View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.epoll;
17  
18  import io.netty.channel.EventLoop;
19  import io.netty.channel.EventLoopGroup;
20  import io.netty.channel.SelectStrategy;
21  import io.netty.channel.SingleThreadEventLoop;
22  import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
23  import io.netty.channel.unix.FileDescriptor;
24  import io.netty.util.IntSupplier;
25  import io.netty.util.collection.IntObjectHashMap;
26  import io.netty.util.collection.IntObjectMap;
27  import io.netty.util.concurrent.RejectedExecutionHandler;
28  import io.netty.util.internal.ObjectUtil;
29  import io.netty.util.internal.PlatformDependent;
30  import io.netty.util.internal.logging.InternalLogger;
31  import io.netty.util.internal.logging.InternalLoggerFactory;
32  
33  import java.io.IOException;
34  import java.util.ArrayList;
35  import java.util.Collection;
36  import java.util.Queue;
37  import java.util.concurrent.ThreadFactory;
38  import java.util.concurrent.Callable;
39  import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
40  
41  import static java.lang.Math.min;
42  
43  /**
44   * {@link EventLoop} which uses epoll under the covers. Only works on Linux!
45   */
46  final class EpollEventLoop extends SingleThreadEventLoop {
47      private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
48      private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
49              AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
50  
51      private final FileDescriptor epollFd;
52      private final FileDescriptor eventFd;
53      private final FileDescriptor timerFd;
54      private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096);
55      private final boolean allowGrowing;
56      private final EpollEventArray events;
57      private final IovArray iovArray = new IovArray();
58      private final SelectStrategy selectStrategy;
59      private final IntSupplier selectNowSupplier = new IntSupplier() {
60          @Override
61          public int get() throws Exception {
62              return epollWaitNow();
63          }
64      };
65  
66      private final Callable<Integer> pendingTasksCallable = new Callable<Integer>() {
67          @Override
68          public Integer call() throws Exception {
69              return EpollEventLoop.super.pendingTasks();
70          }
71      };
72  
73      @SuppressWarnings("unused")
74      private volatile int wakenUp;
75      private volatile int ioRatio = 50;
76  
77      EpollEventLoop(EventLoopGroup parent, ThreadFactory threadFactory, int maxEvents,
78                     SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) {
79          super(parent, threadFactory, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler);
80          selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy");
81          if (maxEvents == 0) {
82              allowGrowing = true;
83              events = new EpollEventArray(4096);
84          } else {
85              allowGrowing = false;
86              events = new EpollEventArray(maxEvents);
87          }
88          boolean success = false;
89          FileDescriptor epollFd = null;
90          FileDescriptor eventFd = null;
91          FileDescriptor timerFd = null;
92          try {
93              this.epollFd = epollFd = Native.newEpollCreate();
94              this.eventFd = eventFd = Native.newEventFd();
95              try {
96                  Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN);
97              } catch (IOException e) {
98                  throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e);
99              }
100             this.timerFd = timerFd = Native.newTimerFd();
101             try {
102                 Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET);
103             } catch (IOException e) {
104                 throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e);
105             }
106             success = true;
107         } finally {
108             if (!success) {
109                 if (epollFd != null) {
110                     try {
111                         epollFd.close();
112                     } catch (Exception e) {
113                         // ignore
114                     }
115                 }
116                 if (eventFd != null) {
117                     try {
118                         eventFd.close();
119                     } catch (Exception e) {
120                         // ignore
121                     }
122                 }
123                 if (timerFd != null) {
124                     try {
125                         timerFd.close();
126                     } catch (Exception e) {
127                         // ignore
128                     }
129                 }
130             }
131         }
132     }
133 
134     /**
135      * Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}.
136      */
137     IovArray cleanArray() {
138         iovArray.clear();
139         return iovArray;
140     }
141 
142     @Override
143     protected void wakeup(boolean inEventLoop) {
144         if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) {
145             // write to the evfd which will then wake-up epoll_wait(...)
146             Native.eventFdWrite(eventFd.intValue(), 1L);
147         }
148     }
149 
150     /**
151      * Register the given epoll with this {@link EventLoop}.
152      */
153     void add(AbstractEpollChannel ch) throws IOException {
154         assert inEventLoop();
155         int fd = ch.fd().intValue();
156         Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
157         channels.put(fd, ch);
158     }
159 
160     /**
161      * The flags of the given epoll was modified so update the registration
162      */
163     void modify(AbstractEpollChannel ch) throws IOException {
164         assert inEventLoop();
165         Native.epollCtlMod(epollFd.intValue(), ch.fd().intValue(), ch.flags);
166     }
167 
168     /**
169      * Deregister the given epoll from this {@link EventLoop}.
170      */
171     void remove(AbstractEpollChannel ch) throws IOException {
172         assert inEventLoop();
173 
174         if (ch.isOpen()) {
175             int fd = ch.fd().intValue();
176             if (channels.remove(fd) != null) {
177                 // Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
178                 // removed once the file-descriptor is closed.
179                 Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue());
180             }
181         }
182     }
183 
184     @Override
185     protected Queue<Runnable> newTaskQueue(int maxPendingTasks) {
186         // This event loop never calls takeTask()
187         return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue()
188                                                     : PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks);
189     }
190 
191     @Override
192     public int pendingTasks() {
193         // As we use a MpscQueue we need to ensure pendingTasks() is only executed from within the EventLoop as
194         // otherwise we may see unexpected behavior (as size() is only allowed to be called by a single consumer).
195         // See https://github.com/netty/netty/issues/5297
196         if (inEventLoop()) {
197             return super.pendingTasks();
198         } else {
199             return submit(pendingTasksCallable).syncUninterruptibly().getNow();
200         }
201     }
202     /**
203      * Returns the percentage of the desired amount of time spent for I/O in the event loop.
204      */
205     public int getIoRatio() {
206         return ioRatio;
207     }
208 
209     /**
210      * Sets the percentage of the desired amount of time spent for I/O in the event loop.  The default value is
211      * {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
212      */
213     public void setIoRatio(int ioRatio) {
214         if (ioRatio <= 0 || ioRatio > 100) {
215             throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)");
216         }
217         this.ioRatio = ioRatio;
218     }
219 
220     private int epollWait(boolean oldWakeup) throws IOException {
221         // If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event.
222         // So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended
223         // until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed
224         // in pipeline.
225         if (oldWakeup && hasTasks()) {
226             return epollWaitNow();
227         }
228 
229         long totalDelay = delayNanos(System.nanoTime());
230         int delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE);
231         return Native.epollWait(epollFd, events, timerFd, delaySeconds,
232                 (int) min(totalDelay - delaySeconds * 1000000000L, Integer.MAX_VALUE));
233     }
234 
235     private int epollWaitNow() throws IOException {
236         return Native.epollWait(epollFd, events, timerFd, 0, 0);
237     }
238 
239     @Override
240     protected void run() {
241         for (;;) {
242             try {
243                 int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks());
244                 switch (strategy) {
245                     case SelectStrategy.CONTINUE:
246                         continue;
247                     case SelectStrategy.SELECT:
248                         strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1);
249 
250                         // 'wakenUp.compareAndSet(false, true)' is always evaluated
251                         // before calling 'selector.wakeup()' to reduce the wake-up
252                         // overhead. (Selector.wakeup() is an expensive operation.)
253                         //
254                         // However, there is a race condition in this approach.
255                         // The race condition is triggered when 'wakenUp' is set to
256                         // true too early.
257                         //
258                         // 'wakenUp' is set to true too early if:
259                         // 1) Selector is waken up between 'wakenUp.set(false)' and
260                         //    'selector.select(...)'. (BAD)
261                         // 2) Selector is waken up between 'selector.select(...)' and
262                         //    'if (wakenUp.get()) { ... }'. (OK)
263                         //
264                         // In the first case, 'wakenUp' is set to true and the
265                         // following 'selector.select(...)' will wake up immediately.
266                         // Until 'wakenUp' is set to false again in the next round,
267                         // 'wakenUp.compareAndSet(false, true)' will fail, and therefore
268                         // any attempt to wake up the Selector will fail, too, causing
269                         // the following 'selector.select(...)' call to block
270                         // unnecessarily.
271                         //
272                         // To fix this problem, we wake up the selector again if wakenUp
273                         // is true immediately after selector.select(...).
274                         // It is inefficient in that it wakes up the selector for both
275                         // the first case (BAD - wake-up required) and the second case
276                         // (OK - no wake-up required).
277 
278                         if (wakenUp == 1) {
279                             Native.eventFdWrite(eventFd.intValue(), 1L);
280                         }
281                         // fallthrough
282                     default:
283                 }
284 
285                 final int ioRatio = this.ioRatio;
286                 if (ioRatio == 100) {
287                     try {
288                         if (strategy > 0) {
289                             processReady(events, strategy);
290                         }
291                     } finally {
292                         // Ensure we always run tasks.
293                         runAllTasks();
294                     }
295                 } else {
296                     final long ioStartTime = System.nanoTime();
297 
298                     try {
299                         if (strategy > 0) {
300                             processReady(events, strategy);
301                         }
302                     } finally {
303                         // Ensure we always run tasks.
304                         final long ioTime = System.nanoTime() - ioStartTime;
305                         runAllTasks(ioTime * (100 - ioRatio) / ioRatio);
306                     }
307                 }
308                 if (allowGrowing && strategy == events.length()) {
309                     //increase the size of the array as we needed the whole space for the events
310                     events.increase();
311                 }
312             } catch (Throwable t) {
313                 handleLoopException(t);
314             }
315             // Always handle shutdown even if the loop processing threw an exception.
316             try {
317                 if (isShuttingDown()) {
318                     closeAll();
319                     if (confirmShutdown()) {
320                         break;
321                     }
322                 }
323             } catch (Throwable t) {
324                 handleLoopException(t);
325             }
326         }
327     }
328 
329     private static void handleLoopException(Throwable t) {
330         logger.warn("Unexpected exception in the selector loop.", t);
331 
332         // Prevent possible consecutive immediate failures that lead to
333         // excessive CPU consumption.
334         try {
335             Thread.sleep(1000);
336         } catch (InterruptedException e) {
337             // Ignore.
338         }
339     }
340 
341     private void closeAll() {
342         try {
343             epollWaitNow();
344         } catch (IOException ignore) {
345             // ignore on close
346         }
347         // Using the intermediate collection to prevent ConcurrentModificationException.
348         // In the `close()` method, the channel is deleted from `channels` map.
349         Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size());
350 
351         for (IntObjectMap.Entry<AbstractEpollChannel> entry: channels.entries()) {
352             array.add(entry.value());
353         }
354 
355         for (AbstractEpollChannel ch: array) {
356             ch.unsafe().close(ch.unsafe().voidPromise());
357         }
358     }
359 
360     private void processReady(EpollEventArray events, int ready) {
361         for (int i = 0; i < ready; i ++) {
362             final int fd = events.fd(i);
363             if (fd == eventFd.intValue()) {
364                 // consume wakeup event.
365                 Native.eventFdRead(fd);
366             } else if (fd == timerFd.intValue()) {
367                 // consume wakeup event, necessary because the timer is added with ET mode.
368                 Native.timerFdRead(fd);
369             } else {
370                 final long ev = events.events(i);
371 
372                 AbstractEpollChannel ch = channels.get(fd);
373                 if (ch != null) {
374                     // Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100%
375                     // sure about it!
376                     // Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the
377                     // past.
378                     AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe();
379 
380                     // First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try
381                     // to read from the file descriptor.
382                     // See https://github.com/netty/netty/issues/3785
383                     //
384                     // It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused.
385                     // In either case epollOutReady() will do the correct thing (finish connecting, or fail
386                     // the connection).
387                     // See https://github.com/netty/netty/issues/3848
388                     if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) {
389                         // Force flush of data as the epoll is writable again
390                         unsafe.epollOutReady();
391                     }
392 
393                     // Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input.
394                     // See https://github.com/netty/netty/issues/4317.
395                     //
396                     // If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will
397                     // try to read from the underlying file descriptor and so notify the user about the error.
398                     if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) {
399                         // The Channel is still open and there is something to read. Do it now.
400                         unsafe.epollInReady();
401                     }
402 
403                     // Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case
404                     // we may close the channel directly or try to read more data depending on the state of the
405                     // Channel and als depending on the AbstractEpollChannel subtype.
406                     if ((ev & Native.EPOLLRDHUP) != 0) {
407                         unsafe.epollRdHupReady();
408                     }
409                 } else {
410                     // We received an event for an fd which we not use anymore. Remove it from the epoll_event set.
411                     try {
412                         Native.epollCtlDel(epollFd.intValue(), fd);
413                     } catch (IOException ignore) {
414                         // This can happen but is nothing we need to worry about as we only try to delete
415                         // the fd from the epoll set as we not found it in our mappings. So this call to
416                         // epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was
417                         // deleted before or the file descriptor was closed before.
418                     }
419                 }
420             }
421         }
422     }
423 
424     @Override
425     protected void cleanup() {
426         try {
427             try {
428                 epollFd.close();
429             } catch (IOException e) {
430                 logger.warn("Failed to close the epoll fd.", e);
431             }
432             try {
433                 eventFd.close();
434             } catch (IOException e) {
435                 logger.warn("Failed to close the event fd.", e);
436             }
437             try {
438                 timerFd.close();
439             } catch (IOException e) {
440                 logger.warn("Failed to close the timer fd.", e);
441             }
442         } finally {
443             // release native memory
444             iovArray.release();
445             events.free();
446         }
447     }
448 }