/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.channel.epoll;
import io.netty.channel.EventLoop;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.SelectStrategy;
import io.netty.channel.SingleThreadEventLoop;
import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
import io.netty.channel.unix.FileDescriptor;
import io.netty.channel.unix.IovArray;
import io.netty.util.IntSupplier;
import io.netty.util.collection.IntObjectHashMap;
import io.netty.util.collection.IntObjectMap;
import io.netty.util.concurrent.RejectedExecutionHandler;
import io.netty.util.internal.ObjectUtil;
import io.netty.util.internal.PlatformDependent;
import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
import static java.lang.Math.min;
EventLoop
which uses epoll under the covers. Only works on Linux! /**
* {@link EventLoop} which uses epoll under the covers. Only works on Linux!
*/
final class EpollEventLoop extends SingleThreadEventLoop {
private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
static {
// Ensure JNI is initialized by the time this class is loaded by this time!
// We use unix-common methods in this class which are backed by JNI methods.
Epoll.ensureAvailability();
}
private final FileDescriptor epollFd;
private final FileDescriptor eventFd;
private final FileDescriptor timerFd;
private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096);
private final boolean allowGrowing;
private final EpollEventArray events;
private final IovArray iovArray = new IovArray();
private final SelectStrategy selectStrategy;
private final IntSupplier selectNowSupplier = new IntSupplier() {
@Override
public int get() throws Exception {
return epollWaitNow();
}
};
private final Callable<Integer> pendingTasksCallable = new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return EpollEventLoop.super.pendingTasks();
}
};
private volatile int wakenUp;
private volatile int ioRatio = 50;
// See http://man7.org/linux/man-pages/man2/timerfd_create.2.html.
static final long MAX_SCHEDULED_DAYS = TimeUnit.SECONDS.toDays(999999999);
EpollEventLoop(EventLoopGroup parent, Executor executor, int maxEvents,
SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) {
super(parent, executor, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler);
selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy");
if (maxEvents == 0) {
allowGrowing = true;
events = new EpollEventArray(4096);
} else {
allowGrowing = false;
events = new EpollEventArray(maxEvents);
}
boolean success = false;
FileDescriptor epollFd = null;
FileDescriptor eventFd = null;
FileDescriptor timerFd = null;
try {
this.epollFd = epollFd = Native.newEpollCreate();
this.eventFd = eventFd = Native.newEventFd();
try {
Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN);
} catch (IOException e) {
throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e);
}
this.timerFd = timerFd = Native.newTimerFd();
try {
Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET);
} catch (IOException e) {
throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e);
}
success = true;
} finally {
if (!success) {
if (epollFd != null) {
try {
epollFd.close();
} catch (Exception e) {
// ignore
}
}
if (eventFd != null) {
try {
eventFd.close();
} catch (Exception e) {
// ignore
}
}
if (timerFd != null) {
try {
timerFd.close();
} catch (Exception e) {
// ignore
}
}
}
}
}
/**
* Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}.
*/
IovArray cleanArray() {
iovArray.clear();
return iovArray;
}
@Override
protected void wakeup(boolean inEventLoop) {
if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) {
// write to the evfd which will then wake-up epoll_wait(...)
Native.eventFdWrite(eventFd.intValue(), 1L);
}
}
Register the given epoll with this EventLoop
. /**
* Register the given epoll with this {@link EventLoop}.
*/
void add(AbstractEpollChannel ch) throws IOException {
assert inEventLoop();
int fd = ch.socket.intValue();
Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
channels.put(fd, ch);
}
The flags of the given epoll was modified so update the registration
/**
* The flags of the given epoll was modified so update the registration
*/
void modify(AbstractEpollChannel ch) throws IOException {
assert inEventLoop();
Native.epollCtlMod(epollFd.intValue(), ch.socket.intValue(), ch.flags);
}
Deregister the given epoll from this EventLoop
. /**
* Deregister the given epoll from this {@link EventLoop}.
*/
void remove(AbstractEpollChannel ch) throws IOException {
assert inEventLoop();
if (ch.isOpen()) {
int fd = ch.socket.intValue();
if (channels.remove(fd) != null) {
// Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
// removed once the file-descriptor is closed.
Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue());
}
}
}
@Override
protected Queue<Runnable> newTaskQueue(int maxPendingTasks) {
// This event loop never calls takeTask()
return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue()
: PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks);
}
@Override
public int pendingTasks() {
// As we use a MpscQueue we need to ensure pendingTasks() is only executed from within the EventLoop as
// otherwise we may see unexpected behavior (as size() is only allowed to be called by a single consumer).
// See https://github.com/netty/netty/issues/5297
if (inEventLoop()) {
return super.pendingTasks();
} else {
return submit(pendingTasksCallable).syncUninterruptibly().getNow();
}
}
Returns the percentage of the desired amount of time spent for I/O in the event loop.
/**
* Returns the percentage of the desired amount of time spent for I/O in the event loop.
*/
public int getIoRatio() {
return ioRatio;
}
Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is 50
, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks. /**
* Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is
* {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
*/
public void setIoRatio(int ioRatio) {
if (ioRatio <= 0 || ioRatio > 100) {
throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)");
}
this.ioRatio = ioRatio;
}
private int epollWait(boolean oldWakeup) throws IOException {
// If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event.
// So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended
// until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed
// in pipeline.
if (oldWakeup && hasTasks()) {
return epollWaitNow();
}
long totalDelay = delayNanos(System.nanoTime());
int delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE);
return Native.epollWait(epollFd, events, timerFd, delaySeconds,
(int) min(totalDelay - delaySeconds * 1000000000L, Integer.MAX_VALUE));
}
private int epollWaitNow() throws IOException {
return Native.epollWait(epollFd, events, timerFd, 0, 0);
}
@Override
protected void run() {
for (;;) {
try {
int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks());
switch (strategy) {
case SelectStrategy.CONTINUE:
continue;
case SelectStrategy.SELECT:
strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1);
// 'wakenUp.compareAndSet(false, true)' is always evaluated
// before calling 'selector.wakeup()' to reduce the wake-up
// overhead. (Selector.wakeup() is an expensive operation.)
//
// However, there is a race condition in this approach.
// The race condition is triggered when 'wakenUp' is set to
// true too early.
//
// 'wakenUp' is set to true too early if:
// 1) Selector is waken up between 'wakenUp.set(false)' and
// 'selector.select(...)'. (BAD)
// 2) Selector is waken up between 'selector.select(...)' and
// 'if (wakenUp.get()) { ... }'. (OK)
//
// In the first case, 'wakenUp' is set to true and the
// following 'selector.select(...)' will wake up immediately.
// Until 'wakenUp' is set to false again in the next round,
// 'wakenUp.compareAndSet(false, true)' will fail, and therefore
// any attempt to wake up the Selector will fail, too, causing
// the following 'selector.select(...)' call to block
// unnecessarily.
//
// To fix this problem, we wake up the selector again if wakenUp
// is true immediately after selector.select(...).
// It is inefficient in that it wakes up the selector for both
// the first case (BAD - wake-up required) and the second case
// (OK - no wake-up required).
if (wakenUp == 1) {
Native.eventFdWrite(eventFd.intValue(), 1L);
}
// fallthrough
default:
}
final int ioRatio = this.ioRatio;
if (ioRatio == 100) {
try {
if (strategy > 0) {
processReady(events, strategy);
}
} finally {
// Ensure we always run tasks.
runAllTasks();
}
} else {
final long ioStartTime = System.nanoTime();
try {
if (strategy > 0) {
processReady(events, strategy);
}
} finally {
// Ensure we always run tasks.
final long ioTime = System.nanoTime() - ioStartTime;
runAllTasks(ioTime * (100 - ioRatio) / ioRatio);
}
}
if (allowGrowing && strategy == events.length()) {
//increase the size of the array as we needed the whole space for the events
events.increase();
}
} catch (Throwable t) {
handleLoopException(t);
}
// Always handle shutdown even if the loop processing threw an exception.
try {
if (isShuttingDown()) {
closeAll();
if (confirmShutdown()) {
break;
}
}
} catch (Throwable t) {
handleLoopException(t);
}
}
}
private static void handleLoopException(Throwable t) {
logger.warn("Unexpected exception in the selector loop.", t);
// Prevent possible consecutive immediate failures that lead to
// excessive CPU consumption.
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// Ignore.
}
}
private void closeAll() {
try {
epollWaitNow();
} catch (IOException ignore) {
// ignore on close
}
// Using the intermediate collection to prevent ConcurrentModificationException.
// In the `close()` method, the channel is deleted from `channels` map.
Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size());
for (AbstractEpollChannel channel: channels.values()) {
array.add(channel);
}
for (AbstractEpollChannel ch: array) {
ch.unsafe().close(ch.unsafe().voidPromise());
}
}
private void processReady(EpollEventArray events, int ready) {
for (int i = 0; i < ready; i ++) {
final int fd = events.fd(i);
if (fd == eventFd.intValue()) {
// consume wakeup event.
Native.eventFdRead(fd);
} else if (fd == timerFd.intValue()) {
// consume wakeup event, necessary because the timer is added with ET mode.
Native.timerFdRead(fd);
} else {
final long ev = events.events(i);
AbstractEpollChannel ch = channels.get(fd);
if (ch != null) {
// Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100%
// sure about it!
// Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the
// past.
AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe();
// First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try
// to read from the file descriptor.
// See https://github.com/netty/netty/issues/3785
//
// It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused.
// In either case epollOutReady() will do the correct thing (finish connecting, or fail
// the connection).
// See https://github.com/netty/netty/issues/3848
if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) {
// Force flush of data as the epoll is writable again
unsafe.epollOutReady();
}
// Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input.
// See https://github.com/netty/netty/issues/4317.
//
// If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will
// try to read from the underlying file descriptor and so notify the user about the error.
if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) {
// The Channel is still open and there is something to read. Do it now.
unsafe.epollInReady();
}
// Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case
// we may close the channel directly or try to read more data depending on the state of the
// Channel and als depending on the AbstractEpollChannel subtype.
if ((ev & Native.EPOLLRDHUP) != 0) {
unsafe.epollRdHupReady();
}
} else {
// We received an event for an fd which we not use anymore. Remove it from the epoll_event set.
try {
Native.epollCtlDel(epollFd.intValue(), fd);
} catch (IOException ignore) {
// This can happen but is nothing we need to worry about as we only try to delete
// the fd from the epoll set as we not found it in our mappings. So this call to
// epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was
// deleted before or the file descriptor was closed before.
}
}
}
}
}
@Override
protected void cleanup() {
try {
try {
epollFd.close();
} catch (IOException e) {
logger.warn("Failed to close the epoll fd.", e);
}
try {
eventFd.close();
} catch (IOException e) {
logger.warn("Failed to close the event fd.", e);
}
try {
timerFd.close();
} catch (IOException e) {
logger.warn("Failed to close the timer fd.", e);
}
} finally {
// release native memory
iovArray.release();
events.free();
}
}
@Override
protected void validateScheduled(long amount, TimeUnit unit) {
long days = unit.toDays(amount);
if (days > MAX_SCHEDULED_DAYS) {
throw new IllegalArgumentException("days: " + days + " (expected: < " + MAX_SCHEDULED_DAYS + ')');
}
}
}