View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.epoll;
17  
18  import io.netty.channel.EventLoop;
19  import io.netty.channel.EventLoopGroup;
20  import io.netty.channel.SelectStrategy;
21  import io.netty.channel.SingleThreadEventLoop;
22  import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
23  import io.netty.channel.unix.FileDescriptor;
24  import io.netty.channel.unix.IovArray;
25  import io.netty.util.IntSupplier;
26  import io.netty.util.collection.IntObjectHashMap;
27  import io.netty.util.collection.IntObjectMap;
28  import io.netty.util.concurrent.RejectedExecutionHandler;
29  import io.netty.util.internal.ObjectUtil;
30  import io.netty.util.internal.PlatformDependent;
31  import io.netty.util.internal.logging.InternalLogger;
32  import io.netty.util.internal.logging.InternalLoggerFactory;
33  
34  import java.io.IOException;
35  import java.util.ArrayList;
36  import java.util.Collection;
37  import java.util.Queue;
38  import java.util.concurrent.Callable;
39  import java.util.concurrent.Executor;
40  import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
41  
42  import static java.lang.Math.min;
43  
44  /**
45   * {@link EventLoop} which uses epoll under the covers. Only works on Linux!
46   */
47  final class EpollEventLoop extends SingleThreadEventLoop {
48      private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
49      private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
50              AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
51  
52      static {
53          // Ensure JNI is initialized by the time this class is loaded by this time!
54          // We use unix-common methods in this class which are backed by JNI methods.
55          Epoll.ensureAvailability();
56      }
57  
58      private final FileDescriptor epollFd;
59      private final FileDescriptor eventFd;
60      private final FileDescriptor timerFd;
61      private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096);
62      private final boolean allowGrowing;
63      private final EpollEventArray events;
64      private final IovArray iovArray = new IovArray();
65      private final SelectStrategy selectStrategy;
66      private final IntSupplier selectNowSupplier = new IntSupplier() {
67          @Override
68          public int get() throws Exception {
69              return epollWaitNow();
70          }
71      };
72      private final Callable<Integer> pendingTasksCallable = new Callable<Integer>() {
73          @Override
74          public Integer call() throws Exception {
75              return EpollEventLoop.super.pendingTasks();
76          }
77      };
78      private volatile int wakenUp;
79      private volatile int ioRatio = 50;
80  
81      EpollEventLoop(EventLoopGroup parent, Executor executor, int maxEvents,
82                     SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) {
83          super(parent, executor, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler);
84          selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy");
85          if (maxEvents == 0) {
86              allowGrowing = true;
87              events = new EpollEventArray(4096);
88          } else {
89              allowGrowing = false;
90              events = new EpollEventArray(maxEvents);
91          }
92          boolean success = false;
93          FileDescriptor epollFd = null;
94          FileDescriptor eventFd = null;
95          FileDescriptor timerFd = null;
96          try {
97              this.epollFd = epollFd = Native.newEpollCreate();
98              this.eventFd = eventFd = Native.newEventFd();
99              try {
100                 Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN);
101             } catch (IOException e) {
102                 throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e);
103             }
104             this.timerFd = timerFd = Native.newTimerFd();
105             try {
106                 Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET);
107             } catch (IOException e) {
108                 throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e);
109             }
110             success = true;
111         } finally {
112             if (!success) {
113                 if (epollFd != null) {
114                     try {
115                         epollFd.close();
116                     } catch (Exception e) {
117                         // ignore
118                     }
119                 }
120                 if (eventFd != null) {
121                     try {
122                         eventFd.close();
123                     } catch (Exception e) {
124                         // ignore
125                     }
126                 }
127                 if (timerFd != null) {
128                     try {
129                         timerFd.close();
130                     } catch (Exception e) {
131                         // ignore
132                     }
133                 }
134             }
135         }
136     }
137 
138     /**
139      * Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}.
140      */
141     IovArray cleanArray() {
142         iovArray.clear();
143         return iovArray;
144     }
145 
146     @Override
147     protected void wakeup(boolean inEventLoop) {
148         if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) {
149             // write to the evfd which will then wake-up epoll_wait(...)
150             Native.eventFdWrite(eventFd.intValue(), 1L);
151         }
152     }
153 
154     /**
155      * Register the given epoll with this {@link EventLoop}.
156      */
157     void add(AbstractEpollChannel ch) throws IOException {
158         assert inEventLoop();
159         int fd = ch.socket.intValue();
160         Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
161         channels.put(fd, ch);
162     }
163 
164     /**
165      * The flags of the given epoll was modified so update the registration
166      */
167     void modify(AbstractEpollChannel ch) throws IOException {
168         assert inEventLoop();
169         Native.epollCtlMod(epollFd.intValue(), ch.socket.intValue(), ch.flags);
170     }
171 
172     /**
173      * Deregister the given epoll from this {@link EventLoop}.
174      */
175     void remove(AbstractEpollChannel ch) throws IOException {
176         assert inEventLoop();
177 
178         if (ch.isOpen()) {
179             int fd = ch.socket.intValue();
180             if (channels.remove(fd) != null) {
181                 // Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
182                 // removed once the file-descriptor is closed.
183                 Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue());
184             }
185         }
186     }
187 
188     @Override
189     protected Queue<Runnable> newTaskQueue(int maxPendingTasks) {
190         // This event loop never calls takeTask()
191         return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue()
192                                                     : PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks);
193     }
194 
195     @Override
196     public int pendingTasks() {
197         // As we use a MpscQueue we need to ensure pendingTasks() is only executed from within the EventLoop as
198         // otherwise we may see unexpected behavior (as size() is only allowed to be called by a single consumer).
199         // See https://github.com/netty/netty/issues/5297
200         if (inEventLoop()) {
201             return super.pendingTasks();
202         } else {
203             return submit(pendingTasksCallable).syncUninterruptibly().getNow();
204         }
205     }
206     /**
207      * Returns the percentage of the desired amount of time spent for I/O in the event loop.
208      */
209     public int getIoRatio() {
210         return ioRatio;
211     }
212 
213     /**
214      * Sets the percentage of the desired amount of time spent for I/O in the event loop.  The default value is
215      * {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
216      */
217     public void setIoRatio(int ioRatio) {
218         if (ioRatio <= 0 || ioRatio > 100) {
219             throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)");
220         }
221         this.ioRatio = ioRatio;
222     }
223 
224     private int epollWait(boolean oldWakeup) throws IOException {
225         // If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event.
226         // So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended
227         // until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed
228         // in pipeline.
229         if (oldWakeup && hasTasks()) {
230             return epollWaitNow();
231         }
232 
233         long totalDelay = delayNanos(System.nanoTime());
234         int delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE);
235         return Native.epollWait(epollFd, events, timerFd, delaySeconds,
236                 (int) min(totalDelay - delaySeconds * 1000000000L, Integer.MAX_VALUE));
237     }
238 
239     private int epollWaitNow() throws IOException {
240         return Native.epollWait(epollFd, events, timerFd, 0, 0);
241     }
242 
243     @Override
244     protected void run() {
245         for (;;) {
246             try {
247                 int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks());
248                 switch (strategy) {
249                     case SelectStrategy.CONTINUE:
250                         continue;
251                     case SelectStrategy.SELECT:
252                         strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1);
253 
254                         // 'wakenUp.compareAndSet(false, true)' is always evaluated
255                         // before calling 'selector.wakeup()' to reduce the wake-up
256                         // overhead. (Selector.wakeup() is an expensive operation.)
257                         //
258                         // However, there is a race condition in this approach.
259                         // The race condition is triggered when 'wakenUp' is set to
260                         // true too early.
261                         //
262                         // 'wakenUp' is set to true too early if:
263                         // 1) Selector is waken up between 'wakenUp.set(false)' and
264                         //    'selector.select(...)'. (BAD)
265                         // 2) Selector is waken up between 'selector.select(...)' and
266                         //    'if (wakenUp.get()) { ... }'. (OK)
267                         //
268                         // In the first case, 'wakenUp' is set to true and the
269                         // following 'selector.select(...)' will wake up immediately.
270                         // Until 'wakenUp' is set to false again in the next round,
271                         // 'wakenUp.compareAndSet(false, true)' will fail, and therefore
272                         // any attempt to wake up the Selector will fail, too, causing
273                         // the following 'selector.select(...)' call to block
274                         // unnecessarily.
275                         //
276                         // To fix this problem, we wake up the selector again if wakenUp
277                         // is true immediately after selector.select(...).
278                         // It is inefficient in that it wakes up the selector for both
279                         // the first case (BAD - wake-up required) and the second case
280                         // (OK - no wake-up required).
281 
282                         if (wakenUp == 1) {
283                             Native.eventFdWrite(eventFd.intValue(), 1L);
284                         }
285                         // fallthrough
286                     default:
287                 }
288 
289                 final int ioRatio = this.ioRatio;
290                 if (ioRatio == 100) {
291                     try {
292                         if (strategy > 0) {
293                             processReady(events, strategy);
294                         }
295                     } finally {
296                         // Ensure we always run tasks.
297                         runAllTasks();
298                     }
299                 } else {
300                     final long ioStartTime = System.nanoTime();
301 
302                     try {
303                         if (strategy > 0) {
304                             processReady(events, strategy);
305                         }
306                     } finally {
307                         // Ensure we always run tasks.
308                         final long ioTime = System.nanoTime() - ioStartTime;
309                         runAllTasks(ioTime * (100 - ioRatio) / ioRatio);
310                     }
311                 }
312                 if (allowGrowing && strategy == events.length()) {
313                     //increase the size of the array as we needed the whole space for the events
314                     events.increase();
315                 }
316             } catch (Throwable t) {
317                 handleLoopException(t);
318             }
319             // Always handle shutdown even if the loop processing threw an exception.
320             try {
321                 if (isShuttingDown()) {
322                     closeAll();
323                     if (confirmShutdown()) {
324                         break;
325                     }
326                 }
327             } catch (Throwable t) {
328                 handleLoopException(t);
329             }
330         }
331     }
332 
333     private static void handleLoopException(Throwable t) {
334         logger.warn("Unexpected exception in the selector loop.", t);
335 
336         // Prevent possible consecutive immediate failures that lead to
337         // excessive CPU consumption.
338         try {
339             Thread.sleep(1000);
340         } catch (InterruptedException e) {
341             // Ignore.
342         }
343     }
344 
345     private void closeAll() {
346         try {
347             epollWaitNow();
348         } catch (IOException ignore) {
349             // ignore on close
350         }
351         // Using the intermediate collection to prevent ConcurrentModificationException.
352         // In the `close()` method, the channel is deleted from `channels` map.
353         Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size());
354 
355         for (AbstractEpollChannel channel: channels.values()) {
356             array.add(channel);
357         }
358 
359         for (AbstractEpollChannel ch: array) {
360             ch.unsafe().close(ch.unsafe().voidPromise());
361         }
362     }
363 
364     private void processReady(EpollEventArray events, int ready) {
365         for (int i = 0; i < ready; i ++) {
366             final int fd = events.fd(i);
367             if (fd == eventFd.intValue()) {
368                 // consume wakeup event.
369                 Native.eventFdRead(fd);
370             } else if (fd == timerFd.intValue()) {
371                 // consume wakeup event, necessary because the timer is added with ET mode.
372                 Native.timerFdRead(fd);
373             } else {
374                 final long ev = events.events(i);
375 
376                 AbstractEpollChannel ch = channels.get(fd);
377                 if (ch != null) {
378                     // Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100%
379                     // sure about it!
380                     // Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the
381                     // past.
382                     AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe();
383 
384                     // First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try
385                     // to read from the file descriptor.
386                     // See https://github.com/netty/netty/issues/3785
387                     //
388                     // It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused.
389                     // In either case epollOutReady() will do the correct thing (finish connecting, or fail
390                     // the connection).
391                     // See https://github.com/netty/netty/issues/3848
392                     if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) {
393                         // Force flush of data as the epoll is writable again
394                         unsafe.epollOutReady();
395                     }
396 
397                     // Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input.
398                     // See https://github.com/netty/netty/issues/4317.
399                     //
400                     // If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will
401                     // try to read from the underlying file descriptor and so notify the user about the error.
402                     if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) {
403                         // The Channel is still open and there is something to read. Do it now.
404                         unsafe.epollInReady();
405                     }
406 
407                     // Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case
408                     // we may close the channel directly or try to read more data depending on the state of the
409                     // Channel and als depending on the AbstractEpollChannel subtype.
410                     if ((ev & Native.EPOLLRDHUP) != 0) {
411                         unsafe.epollRdHupReady();
412                     }
413                 } else {
414                     // We received an event for an fd which we not use anymore. Remove it from the epoll_event set.
415                     try {
416                         Native.epollCtlDel(epollFd.intValue(), fd);
417                     } catch (IOException ignore) {
418                         // This can happen but is nothing we need to worry about as we only try to delete
419                         // the fd from the epoll set as we not found it in our mappings. So this call to
420                         // epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was
421                         // deleted before or the file descriptor was closed before.
422                     }
423                 }
424             }
425         }
426     }
427 
428     @Override
429     protected void cleanup() {
430         try {
431             try {
432                 epollFd.close();
433             } catch (IOException e) {
434                 logger.warn("Failed to close the epoll fd.", e);
435             }
436             try {
437                 eventFd.close();
438             } catch (IOException e) {
439                 logger.warn("Failed to close the event fd.", e);
440             }
441             try {
442                 timerFd.close();
443             } catch (IOException e) {
444                 logger.warn("Failed to close the timer fd.", e);
445             }
446         } finally {
447             // release native memory
448             iovArray.release();
449             events.free();
450         }
451     }
452 }