1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.channel.epoll;
17
18 import io.netty.channel.EventLoop;
19 import io.netty.channel.EventLoopGroup;
20 import io.netty.channel.SelectStrategy;
21 import io.netty.channel.SingleThreadEventLoop;
22 import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
23 import io.netty.channel.unix.FileDescriptor;
24 import io.netty.util.IntSupplier;
25 import io.netty.util.collection.IntObjectHashMap;
26 import io.netty.util.collection.IntObjectMap;
27 import io.netty.util.concurrent.RejectedExecutionHandler;
28 import io.netty.util.internal.ObjectUtil;
29 import io.netty.util.internal.PlatformDependent;
30 import io.netty.util.internal.logging.InternalLogger;
31 import io.netty.util.internal.logging.InternalLoggerFactory;
32
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Collection;
36 import java.util.Queue;
37 import java.util.concurrent.ThreadFactory;
38 import java.util.concurrent.Callable;
39 import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
40
41 import static java.lang.Math.min;
42
43 /**
44 * {@link EventLoop} which uses epoll under the covers. Only works on Linux!
45 */
46 final class EpollEventLoop extends SingleThreadEventLoop {
47 private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
48 private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
49 AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
50
51 private final FileDescriptor epollFd;
52 private final FileDescriptor eventFd;
53 private final FileDescriptor timerFd;
54 private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096);
55 private final boolean allowGrowing;
56 private final EpollEventArray events;
57 private final IovArray iovArray = new IovArray();
58 private final SelectStrategy selectStrategy;
59 private final IntSupplier selectNowSupplier = new IntSupplier() {
60 @Override
61 public int get() throws Exception {
62 return epollWaitNow();
63 }
64 };
65
66 private final Callable<Integer> pendingTasksCallable = new Callable<Integer>() {
67 @Override
68 public Integer call() throws Exception {
69 return EpollEventLoop.super.pendingTasks();
70 }
71 };
72
73 @SuppressWarnings("unused")
74 private volatile int wakenUp;
75 private volatile int ioRatio = 50;
76
77 EpollEventLoop(EventLoopGroup parent, ThreadFactory threadFactory, int maxEvents,
78 SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) {
79 super(parent, threadFactory, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler);
80 selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy");
81 if (maxEvents == 0) {
82 allowGrowing = true;
83 events = new EpollEventArray(4096);
84 } else {
85 allowGrowing = false;
86 events = new EpollEventArray(maxEvents);
87 }
88 boolean success = false;
89 FileDescriptor epollFd = null;
90 FileDescriptor eventFd = null;
91 FileDescriptor timerFd = null;
92 try {
93 this.epollFd = epollFd = Native.newEpollCreate();
94 this.eventFd = eventFd = Native.newEventFd();
95 try {
96 Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN);
97 } catch (IOException e) {
98 throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e);
99 }
100 this.timerFd = timerFd = Native.newTimerFd();
101 try {
102 Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET);
103 } catch (IOException e) {
104 throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e);
105 }
106 success = true;
107 } finally {
108 if (!success) {
109 if (epollFd != null) {
110 try {
111 epollFd.close();
112 } catch (Exception e) {
113 // ignore
114 }
115 }
116 if (eventFd != null) {
117 try {
118 eventFd.close();
119 } catch (Exception e) {
120 // ignore
121 }
122 }
123 if (timerFd != null) {
124 try {
125 timerFd.close();
126 } catch (Exception e) {
127 // ignore
128 }
129 }
130 }
131 }
132 }
133
134 /**
135 * Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}.
136 */
137 IovArray cleanArray() {
138 iovArray.clear();
139 return iovArray;
140 }
141
142 @Override
143 protected void wakeup(boolean inEventLoop) {
144 if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) {
145 // write to the evfd which will then wake-up epoll_wait(...)
146 Native.eventFdWrite(eventFd.intValue(), 1L);
147 }
148 }
149
150 /**
151 * Register the given epoll with this {@link EventLoop}.
152 */
153 void add(AbstractEpollChannel ch) throws IOException {
154 assert inEventLoop();
155 int fd = ch.fd().intValue();
156 Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
157 channels.put(fd, ch);
158 }
159
160 /**
161 * The flags of the given epoll was modified so update the registration
162 */
163 void modify(AbstractEpollChannel ch) throws IOException {
164 assert inEventLoop();
165 Native.epollCtlMod(epollFd.intValue(), ch.fd().intValue(), ch.flags);
166 }
167
168 /**
169 * Deregister the given epoll from this {@link EventLoop}.
170 */
171 void remove(AbstractEpollChannel ch) throws IOException {
172 assert inEventLoop();
173
174 if (ch.isOpen()) {
175 int fd = ch.fd().intValue();
176 if (channels.remove(fd) != null) {
177 // Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
178 // removed once the file-descriptor is closed.
179 Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue());
180 }
181 }
182 }
183
184 @Override
185 protected Queue<Runnable> newTaskQueue(int maxPendingTasks) {
186 // This event loop never calls takeTask()
187 return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue()
188 : PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks);
189 }
190
191 @Override
192 public int pendingTasks() {
193 // As we use a MpscQueue we need to ensure pendingTasks() is only executed from within the EventLoop as
194 // otherwise we may see unexpected behavior (as size() is only allowed to be called by a single consumer).
195 // See https://github.com/netty/netty/issues/5297
196 if (inEventLoop()) {
197 return super.pendingTasks();
198 } else {
199 return submit(pendingTasksCallable).syncUninterruptibly().getNow();
200 }
201 }
202 /**
203 * Returns the percentage of the desired amount of time spent for I/O in the event loop.
204 */
205 public int getIoRatio() {
206 return ioRatio;
207 }
208
209 /**
210 * Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is
211 * {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
212 */
213 public void setIoRatio(int ioRatio) {
214 if (ioRatio <= 0 || ioRatio > 100) {
215 throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)");
216 }
217 this.ioRatio = ioRatio;
218 }
219
220 private int epollWait(boolean oldWakeup) throws IOException {
221 // If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event.
222 // So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended
223 // until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed
224 // in pipeline.
225 if (oldWakeup && hasTasks()) {
226 return epollWaitNow();
227 }
228
229 long totalDelay = delayNanos(System.nanoTime());
230 int delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE);
231 return Native.epollWait(epollFd, events, timerFd, delaySeconds,
232 (int) min(totalDelay - delaySeconds * 1000000000L, Integer.MAX_VALUE));
233 }
234
235 private int epollWaitNow() throws IOException {
236 return Native.epollWait(epollFd, events, timerFd, 0, 0);
237 }
238
239 @Override
240 protected void run() {
241 for (;;) {
242 try {
243 int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks());
244 switch (strategy) {
245 case SelectStrategy.CONTINUE:
246 continue;
247 case SelectStrategy.SELECT:
248 strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1);
249
250 // 'wakenUp.compareAndSet(false, true)' is always evaluated
251 // before calling 'selector.wakeup()' to reduce the wake-up
252 // overhead. (Selector.wakeup() is an expensive operation.)
253 //
254 // However, there is a race condition in this approach.
255 // The race condition is triggered when 'wakenUp' is set to
256 // true too early.
257 //
258 // 'wakenUp' is set to true too early if:
259 // 1) Selector is waken up between 'wakenUp.set(false)' and
260 // 'selector.select(...)'. (BAD)
261 // 2) Selector is waken up between 'selector.select(...)' and
262 // 'if (wakenUp.get()) { ... }'. (OK)
263 //
264 // In the first case, 'wakenUp' is set to true and the
265 // following 'selector.select(...)' will wake up immediately.
266 // Until 'wakenUp' is set to false again in the next round,
267 // 'wakenUp.compareAndSet(false, true)' will fail, and therefore
268 // any attempt to wake up the Selector will fail, too, causing
269 // the following 'selector.select(...)' call to block
270 // unnecessarily.
271 //
272 // To fix this problem, we wake up the selector again if wakenUp
273 // is true immediately after selector.select(...).
274 // It is inefficient in that it wakes up the selector for both
275 // the first case (BAD - wake-up required) and the second case
276 // (OK - no wake-up required).
277
278 if (wakenUp == 1) {
279 Native.eventFdWrite(eventFd.intValue(), 1L);
280 }
281 // fallthrough
282 default:
283 }
284
285 final int ioRatio = this.ioRatio;
286 if (ioRatio == 100) {
287 try {
288 if (strategy > 0) {
289 processReady(events, strategy);
290 }
291 } finally {
292 // Ensure we always run tasks.
293 runAllTasks();
294 }
295 } else {
296 final long ioStartTime = System.nanoTime();
297
298 try {
299 if (strategy > 0) {
300 processReady(events, strategy);
301 }
302 } finally {
303 // Ensure we always run tasks.
304 final long ioTime = System.nanoTime() - ioStartTime;
305 runAllTasks(ioTime * (100 - ioRatio) / ioRatio);
306 }
307 }
308 if (allowGrowing && strategy == events.length()) {
309 //increase the size of the array as we needed the whole space for the events
310 events.increase();
311 }
312 } catch (Throwable t) {
313 handleLoopException(t);
314 }
315 // Always handle shutdown even if the loop processing threw an exception.
316 try {
317 if (isShuttingDown()) {
318 closeAll();
319 if (confirmShutdown()) {
320 break;
321 }
322 }
323 } catch (Throwable t) {
324 handleLoopException(t);
325 }
326 }
327 }
328
329 private static void handleLoopException(Throwable t) {
330 logger.warn("Unexpected exception in the selector loop.", t);
331
332 // Prevent possible consecutive immediate failures that lead to
333 // excessive CPU consumption.
334 try {
335 Thread.sleep(1000);
336 } catch (InterruptedException e) {
337 // Ignore.
338 }
339 }
340
341 private void closeAll() {
342 try {
343 epollWaitNow();
344 } catch (IOException ignore) {
345 // ignore on close
346 }
347 // Using the intermediate collection to prevent ConcurrentModificationException.
348 // In the `close()` method, the channel is deleted from `channels` map.
349 Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size());
350
351 for (IntObjectMap.Entry<AbstractEpollChannel> entry: channels.entries()) {
352 array.add(entry.value());
353 }
354
355 for (AbstractEpollChannel ch: array) {
356 ch.unsafe().close(ch.unsafe().voidPromise());
357 }
358 }
359
360 private void processReady(EpollEventArray events, int ready) {
361 for (int i = 0; i < ready; i ++) {
362 final int fd = events.fd(i);
363 if (fd == eventFd.intValue()) {
364 // consume wakeup event.
365 Native.eventFdRead(fd);
366 } else if (fd == timerFd.intValue()) {
367 // consume wakeup event, necessary because the timer is added with ET mode.
368 Native.timerFdRead(fd);
369 } else {
370 final long ev = events.events(i);
371
372 AbstractEpollChannel ch = channels.get(fd);
373 if (ch != null) {
374 // Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100%
375 // sure about it!
376 // Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the
377 // past.
378 AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe();
379
380 // First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try
381 // to read from the file descriptor.
382 // See https://github.com/netty/netty/issues/3785
383 //
384 // It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused.
385 // In either case epollOutReady() will do the correct thing (finish connecting, or fail
386 // the connection).
387 // See https://github.com/netty/netty/issues/3848
388 if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) {
389 // Force flush of data as the epoll is writable again
390 unsafe.epollOutReady();
391 }
392
393 // Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input.
394 // See https://github.com/netty/netty/issues/4317.
395 //
396 // If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will
397 // try to read from the underlying file descriptor and so notify the user about the error.
398 if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) {
399 // The Channel is still open and there is something to read. Do it now.
400 unsafe.epollInReady();
401 }
402
403 // Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case
404 // we may close the channel directly or try to read more data depending on the state of the
405 // Channel and als depending on the AbstractEpollChannel subtype.
406 if ((ev & Native.EPOLLRDHUP) != 0) {
407 unsafe.epollRdHupReady();
408 }
409 } else {
410 // We received an event for an fd which we not use anymore. Remove it from the epoll_event set.
411 try {
412 Native.epollCtlDel(epollFd.intValue(), fd);
413 } catch (IOException ignore) {
414 // This can happen but is nothing we need to worry about as we only try to delete
415 // the fd from the epoll set as we not found it in our mappings. So this call to
416 // epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was
417 // deleted before or the file descriptor was closed before.
418 }
419 }
420 }
421 }
422 }
423
424 @Override
425 protected void cleanup() {
426 try {
427 try {
428 epollFd.close();
429 } catch (IOException e) {
430 logger.warn("Failed to close the epoll fd.", e);
431 }
432 try {
433 eventFd.close();
434 } catch (IOException e) {
435 logger.warn("Failed to close the event fd.", e);
436 }
437 try {
438 timerFd.close();
439 } catch (IOException e) {
440 logger.warn("Failed to close the timer fd.", e);
441 }
442 } finally {
443 // release native memory
444 iovArray.release();
445 events.free();
446 }
447 }
448 }