/*
 * Copyright 2014 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package io.netty.channel.epoll;

import io.netty.channel.EventLoop;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.SelectStrategy;
import io.netty.channel.SingleThreadEventLoop;
import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
import io.netty.channel.unix.FileDescriptor;
import io.netty.channel.unix.IovArray;
import io.netty.util.IntSupplier;
import io.netty.util.collection.IntObjectHashMap;
import io.netty.util.collection.IntObjectMap;
import io.netty.util.concurrent.RejectedExecutionHandler;
import io.netty.util.internal.ObjectUtil;
import io.netty.util.internal.PlatformDependent;
import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;

import static java.lang.Math.min;

EventLoop which uses epoll under the covers. Only works on Linux!
/** * {@link EventLoop} which uses epoll under the covers. Only works on Linux! */
final class EpollEventLoop extends SingleThreadEventLoop { private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class); private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER = AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp"); static { // Ensure JNI is initialized by the time this class is loaded by this time! // We use unix-common methods in this class which are backed by JNI methods. Epoll.ensureAvailability(); } private final FileDescriptor epollFd; private final FileDescriptor eventFd; private final FileDescriptor timerFd; private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096); private final boolean allowGrowing; private final EpollEventArray events; private final IovArray iovArray = new IovArray(); private final SelectStrategy selectStrategy; private final IntSupplier selectNowSupplier = new IntSupplier() { @Override public int get() throws Exception { return epollWaitNow(); } }; private final Callable<Integer> pendingTasksCallable = new Callable<Integer>() { @Override public Integer call() throws Exception { return EpollEventLoop.super.pendingTasks(); } }; private volatile int wakenUp; private volatile int ioRatio = 50; // See http://man7.org/linux/man-pages/man2/timerfd_create.2.html. static final long MAX_SCHEDULED_DAYS = TimeUnit.SECONDS.toDays(999999999); EpollEventLoop(EventLoopGroup parent, Executor executor, int maxEvents, SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) { super(parent, executor, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler); selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy"); if (maxEvents == 0) { allowGrowing = true; events = new EpollEventArray(4096); } else { allowGrowing = false; events = new EpollEventArray(maxEvents); } boolean success = false; FileDescriptor epollFd = null; FileDescriptor eventFd = null; FileDescriptor timerFd = null; try { this.epollFd = epollFd = Native.newEpollCreate(); this.eventFd = eventFd = Native.newEventFd(); try { Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN); } catch (IOException e) { throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e); } this.timerFd = timerFd = Native.newTimerFd(); try { Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET); } catch (IOException e) { throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e); } success = true; } finally { if (!success) { if (epollFd != null) { try { epollFd.close(); } catch (Exception e) { // ignore } } if (eventFd != null) { try { eventFd.close(); } catch (Exception e) { // ignore } } if (timerFd != null) { try { timerFd.close(); } catch (Exception e) { // ignore } } } } }
Return a cleared IovArray that can be used for writes in this EventLoop.
/** * Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}. */
IovArray cleanArray() { iovArray.clear(); return iovArray; } @Override protected void wakeup(boolean inEventLoop) { if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) { // write to the evfd which will then wake-up epoll_wait(...) Native.eventFdWrite(eventFd.intValue(), 1L); } }
Register the given epoll with this EventLoop.
/** * Register the given epoll with this {@link EventLoop}. */
void add(AbstractEpollChannel ch) throws IOException { assert inEventLoop(); int fd = ch.socket.intValue(); Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags); channels.put(fd, ch); }
The flags of the given epoll was modified so update the registration
/** * The flags of the given epoll was modified so update the registration */
void modify(AbstractEpollChannel ch) throws IOException { assert inEventLoop(); Native.epollCtlMod(epollFd.intValue(), ch.socket.intValue(), ch.flags); }
Deregister the given epoll from this EventLoop.
/** * Deregister the given epoll from this {@link EventLoop}. */
void remove(AbstractEpollChannel ch) throws IOException { assert inEventLoop(); if (ch.isOpen()) { int fd = ch.socket.intValue(); if (channels.remove(fd) != null) { // Remove the epoll. This is only needed if it's still open as otherwise it will be automatically // removed once the file-descriptor is closed. Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue()); } } } @Override protected Queue<Runnable> newTaskQueue(int maxPendingTasks) { // This event loop never calls takeTask() return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue() : PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks); } @Override public int pendingTasks() { // As we use a MpscQueue we need to ensure pendingTasks() is only executed from within the EventLoop as // otherwise we may see unexpected behavior (as size() is only allowed to be called by a single consumer). // See https://github.com/netty/netty/issues/5297 if (inEventLoop()) { return super.pendingTasks(); } else { return submit(pendingTasksCallable).syncUninterruptibly().getNow(); } }
Returns the percentage of the desired amount of time spent for I/O in the event loop.
/** * Returns the percentage of the desired amount of time spent for I/O in the event loop. */
public int getIoRatio() { return ioRatio; }
Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is 50, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
/** * Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is * {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks. */
public void setIoRatio(int ioRatio) { if (ioRatio <= 0 || ioRatio > 100) { throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)"); } this.ioRatio = ioRatio; } private int epollWait(boolean oldWakeup) throws IOException { // If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event. // So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended // until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed // in pipeline. if (oldWakeup && hasTasks()) { return epollWaitNow(); } long totalDelay = delayNanos(System.nanoTime()); int delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE); return Native.epollWait(epollFd, events, timerFd, delaySeconds, (int) min(totalDelay - delaySeconds * 1000000000L, Integer.MAX_VALUE)); } private int epollWaitNow() throws IOException { return Native.epollWait(epollFd, events, timerFd, 0, 0); } @Override protected void run() { for (;;) { try { int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks()); switch (strategy) { case SelectStrategy.CONTINUE: continue; case SelectStrategy.SELECT: strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1); // 'wakenUp.compareAndSet(false, true)' is always evaluated // before calling 'selector.wakeup()' to reduce the wake-up // overhead. (Selector.wakeup() is an expensive operation.) // // However, there is a race condition in this approach. // The race condition is triggered when 'wakenUp' is set to // true too early. // // 'wakenUp' is set to true too early if: // 1) Selector is waken up between 'wakenUp.set(false)' and // 'selector.select(...)'. (BAD) // 2) Selector is waken up between 'selector.select(...)' and // 'if (wakenUp.get()) { ... }'. (OK) // // In the first case, 'wakenUp' is set to true and the // following 'selector.select(...)' will wake up immediately. // Until 'wakenUp' is set to false again in the next round, // 'wakenUp.compareAndSet(false, true)' will fail, and therefore // any attempt to wake up the Selector will fail, too, causing // the following 'selector.select(...)' call to block // unnecessarily. // // To fix this problem, we wake up the selector again if wakenUp // is true immediately after selector.select(...). // It is inefficient in that it wakes up the selector for both // the first case (BAD - wake-up required) and the second case // (OK - no wake-up required). if (wakenUp == 1) { Native.eventFdWrite(eventFd.intValue(), 1L); } // fallthrough default: } final int ioRatio = this.ioRatio; if (ioRatio == 100) { try { if (strategy > 0) { processReady(events, strategy); } } finally { // Ensure we always run tasks. runAllTasks(); } } else { final long ioStartTime = System.nanoTime(); try { if (strategy > 0) { processReady(events, strategy); } } finally { // Ensure we always run tasks. final long ioTime = System.nanoTime() - ioStartTime; runAllTasks(ioTime * (100 - ioRatio) / ioRatio); } } if (allowGrowing && strategy == events.length()) { //increase the size of the array as we needed the whole space for the events events.increase(); } } catch (Throwable t) { handleLoopException(t); } // Always handle shutdown even if the loop processing threw an exception. try { if (isShuttingDown()) { closeAll(); if (confirmShutdown()) { break; } } } catch (Throwable t) { handleLoopException(t); } } } private static void handleLoopException(Throwable t) { logger.warn("Unexpected exception in the selector loop.", t); // Prevent possible consecutive immediate failures that lead to // excessive CPU consumption. try { Thread.sleep(1000); } catch (InterruptedException e) { // Ignore. } } private void closeAll() { try { epollWaitNow(); } catch (IOException ignore) { // ignore on close } // Using the intermediate collection to prevent ConcurrentModificationException. // In the `close()` method, the channel is deleted from `channels` map. Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size()); for (AbstractEpollChannel channel: channels.values()) { array.add(channel); } for (AbstractEpollChannel ch: array) { ch.unsafe().close(ch.unsafe().voidPromise()); } } private void processReady(EpollEventArray events, int ready) { for (int i = 0; i < ready; i ++) { final int fd = events.fd(i); if (fd == eventFd.intValue()) { // consume wakeup event. Native.eventFdRead(fd); } else if (fd == timerFd.intValue()) { // consume wakeup event, necessary because the timer is added with ET mode. Native.timerFdRead(fd); } else { final long ev = events.events(i); AbstractEpollChannel ch = channels.get(fd); if (ch != null) { // Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100% // sure about it! // Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the // past. AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe(); // First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try // to read from the file descriptor. // See https://github.com/netty/netty/issues/3785 // // It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused. // In either case epollOutReady() will do the correct thing (finish connecting, or fail // the connection). // See https://github.com/netty/netty/issues/3848 if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) { // Force flush of data as the epoll is writable again unsafe.epollOutReady(); } // Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input. // See https://github.com/netty/netty/issues/4317. // // If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will // try to read from the underlying file descriptor and so notify the user about the error. if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) { // The Channel is still open and there is something to read. Do it now. unsafe.epollInReady(); } // Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case // we may close the channel directly or try to read more data depending on the state of the // Channel and als depending on the AbstractEpollChannel subtype. if ((ev & Native.EPOLLRDHUP) != 0) { unsafe.epollRdHupReady(); } } else { // We received an event for an fd which we not use anymore. Remove it from the epoll_event set. try { Native.epollCtlDel(epollFd.intValue(), fd); } catch (IOException ignore) { // This can happen but is nothing we need to worry about as we only try to delete // the fd from the epoll set as we not found it in our mappings. So this call to // epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was // deleted before or the file descriptor was closed before. } } } } } @Override protected void cleanup() { try { try { epollFd.close(); } catch (IOException e) { logger.warn("Failed to close the epoll fd.", e); } try { eventFd.close(); } catch (IOException e) { logger.warn("Failed to close the event fd.", e); } try { timerFd.close(); } catch (IOException e) { logger.warn("Failed to close the timer fd.", e); } } finally { // release native memory iovArray.release(); events.free(); } } @Override protected void validateScheduled(long amount, TimeUnit unit) { long days = unit.toDays(amount); if (days > MAX_SCHEDULED_DAYS) { throw new IllegalArgumentException("days: " + days + " (expected: < " + MAX_SCHEDULED_DAYS + ')'); } } }