View Javadoc
1   /*
2    * Copyright 2024 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.uring;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.channel.ChannelOption;
20  import io.netty.channel.unix.Buffer;
21  import io.netty.channel.unix.Limits;
22  import io.netty.util.internal.MathUtil;
23  import io.netty.util.internal.PlatformDependent;
24  import io.netty.util.internal.SystemPropertyUtil;
25  import io.netty.util.internal.logging.InternalLogger;
26  import io.netty.util.internal.logging.InternalLoggerFactory;
27  
28  import java.nio.ByteBuffer;
29  
30  public final class IoUring {
31  
32      private static final Throwable UNAVAILABILITY_CAUSE;
33      private static final boolean IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
34      private static final boolean IORING_SPLICE_SUPPORTED;
35      private static final boolean IORING_SEND_ZC_SUPPORTED;
36      private static final boolean IORING_SENDMSG_ZC_SUPPORTED;
37      private static final boolean IORING_ACCEPT_NO_WAIT_SUPPORTED;
38      private static final boolean IORING_ACCEPT_MULTISHOT_SUPPORTED;
39      private static final boolean IORING_RECV_MULTISHOT_SUPPORTED;
40      private static final boolean IORING_RECVSEND_BUNDLE_SUPPORTED;
41      private static final boolean IORING_POLL_ADD_MULTISHOT_SUPPORTED;
42      private static final boolean IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
43      private static final boolean IORING_SETUP_SUBMIT_ALL_SUPPORTED;
44      private static final boolean IORING_SETUP_CQE_MIXED_SUPPORTED;
45      private static final boolean IORING_SETUP_CQ_SIZE_SUPPORTED;
46      private static final boolean IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
47      private static final boolean IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
48      private static final boolean IORING_SETUP_NO_SQARRAY_SUPPORTED;
49      private static final boolean IORING_REGISTER_BUFFER_RING_SUPPORTED;
50      private static final boolean IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
51      private static final boolean IORING_ACCEPT_MULTISHOT_ENABLED;
52      private static final boolean IORING_RECV_MULTISHOT_ENABLED;
53      private static final boolean IORING_RECVSEND_BUNDLE_ENABLED;
54      private static final boolean IORING_POLL_ADD_MULTISHOT_ENABLED;
55      static final int NUM_ELEMENTS_IOVEC;
56      static final int DEFAULT_RING_SIZE;
57      static final int DEFAULT_CQ_SIZE;
58      static final int DEFAULT_PENDING_OPS_INITIAL_CAPACITY;
59      static final int DISABLE_SETUP_CQ_SIZE = -1;
60  
61      private static final InternalLogger logger;
62  
63      static {
64          logger = InternalLoggerFactory.getInstance(IoUring.class);
65          Throwable cause = null;
66          boolean socketNonEmptySupported = false;
67          boolean spliceSupported = false;
68          boolean sendZcSupported = false;
69          boolean sendmsgZcSupported = false;
70          boolean acceptSupportNoWait = false;
71          boolean acceptMultishotSupported = false;
72          boolean recvsendBundleSupported = false;
73          boolean recvMultishotSupported = false;
74          boolean pollAddMultishotSupported = false;
75          boolean registerIowqWorkersSupported = false;
76          boolean submitAllSupported = false;
77          boolean cqeMixedSupported = false;
78          boolean setUpCqSizeSupported = false;
79          boolean singleIssuerSupported = false;
80          boolean deferTaskrunSupported = false;
81          boolean noSqarraySupported = false;
82          boolean registerBufferRingSupported = false;
83          boolean registerBufferRingIncSupported = false;
84          int numElementsIoVec = 10;
85          int pendingOpsInitialCapacity;
86  
87          String kernelVersion = "[unknown]";
88          try {
89              if (SystemPropertyUtil.getBoolean("io.netty.transport.noNative", false)) {
90                  cause = new UnsupportedOperationException(
91                          "Native transport was explicit disabled with -Dio.netty.transport.noNative=true");
92              } else {
93                  kernelVersion = Native.kernelVersion();
94                  Native.checkKernelVersion(kernelVersion);
95                  if (PlatformDependent.javaVersion() >= 9) {
96                      RingBuffer ringBuffer = null;
97                      try {
98                          ringBuffer = Native.createRingBuffer(1, 0);
99                          if ((ringBuffer.features() & Native.IORING_FEAT_SUBMIT_STABLE) == 0) {
100                             // This should only happen on kernels < 5.4 which we don't support anyway.
101                             throw new UnsupportedOperationException("IORING_FEAT_SUBMIT_STABLE not supported!");
102                         }
103                         // IOV_MAX should be 1024 and an IOV is 16 bytes which means that by default we reserve around
104                         // 160kb.
105                         numElementsIoVec = SystemPropertyUtil.getInt(
106                                 "io.netty.iouring.numElementsIoVec", 10 * Limits.IOV_MAX);
107                         Native.IoUringProbe ioUringProbe = Native.ioUringProbe(ringBuffer.fd());
108                         Native.checkAllIOSupported(ioUringProbe);
109                         socketNonEmptySupported = Native.isCqeFSockNonEmptySupported(ioUringProbe);
110                         spliceSupported = Native.isSpliceSupported(ioUringProbe);
111                         recvsendBundleSupported = (ringBuffer.features() & Native.IORING_FEAT_RECVSEND_BUNDLE) != 0;
112                         sendZcSupported = Native.isSendZcSupported(ioUringProbe);
113                         sendmsgZcSupported =  Native.isSendmsgZcSupported(ioUringProbe);
114                         // IORING_FEAT_RECVSEND_BUNDLE was added in the same release.
115                         acceptSupportNoWait = recvsendBundleSupported;
116 
117                         acceptMultishotSupported = Native.isAcceptMultishotSupported(ioUringProbe);
118                         recvMultishotSupported = Native.isRecvMultishotSupported();
119                         pollAddMultishotSupported = Native.isPollAddMultiShotSupported(ioUringProbe);
120                         registerIowqWorkersSupported = Native.isRegisterIoWqWorkerSupported(ringBuffer.fd());
121                         submitAllSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SUBMIT_ALL);
122                         cqeMixedSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_CQE_MIXED);
123                         setUpCqSizeSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_CQSIZE);
124                         singleIssuerSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SINGLE_ISSUER);
125                         // IORING_SETUP_DEFER_TASKRUN requires to also set IORING_SETUP_SINGLE_ISSUER.
126                         // See https://manpages.debian.org/unstable/liburing-dev/io_uring_setup.2.en.html
127                         deferTaskrunSupported = Native.ioUringSetupSupportsFlags(
128                                 Native.IORING_SETUP_SINGLE_ISSUER | Native.IORING_SETUP_DEFER_TASKRUN);
129                         noSqarraySupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_NO_SQARRAY);
130                         registerBufferRingSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(), 0);
131                         registerBufferRingIncSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(),
132                                 Native.IOU_PBUF_RING_INC);
133                     } finally {
134                         if (ringBuffer != null) {
135                             try {
136                                 ringBuffer.close();
137                             } catch (Exception ignore) {
138                                 // ignore
139                             }
140                         }
141                     }
142                 } else {
143                     cause = new UnsupportedOperationException("Java 9+ is required");
144                 }
145             }
146         } catch (Throwable t) {
147             cause = t;
148         }
149         // Assign static finals first so printFeatures() (no-arg) can read them.
150         UNAVAILABILITY_CAUSE = cause;
151         IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED = socketNonEmptySupported;
152         IORING_SPLICE_SUPPORTED = spliceSupported;
153         IORING_SEND_ZC_SUPPORTED = sendZcSupported;
154         IORING_SENDMSG_ZC_SUPPORTED = sendmsgZcSupported;
155         IORING_ACCEPT_NO_WAIT_SUPPORTED = acceptSupportNoWait;
156         IORING_ACCEPT_MULTISHOT_SUPPORTED = acceptMultishotSupported;
157         IORING_RECV_MULTISHOT_SUPPORTED = recvMultishotSupported;
158         IORING_RECVSEND_BUNDLE_SUPPORTED = recvsendBundleSupported;
159         IORING_POLL_ADD_MULTISHOT_SUPPORTED = pollAddMultishotSupported;
160         IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED = registerIowqWorkersSupported;
161         IORING_SETUP_SUBMIT_ALL_SUPPORTED = submitAllSupported;
162         IORING_SETUP_CQE_MIXED_SUPPORTED = cqeMixedSupported;
163         IORING_SETUP_CQ_SIZE_SUPPORTED = setUpCqSizeSupported;
164         IORING_SETUP_SINGLE_ISSUER_SUPPORTED = singleIssuerSupported;
165         IORING_SETUP_DEFER_TASKRUN_SUPPORTED = deferTaskrunSupported;
166         IORING_SETUP_NO_SQARRAY_SUPPORTED = noSqarraySupported;
167         IORING_REGISTER_BUFFER_RING_SUPPORTED = registerBufferRingSupported;
168         IORING_REGISTER_BUFFER_RING_INC_SUPPORTED = registerBufferRingIncSupported;
169 
170         IORING_ACCEPT_MULTISHOT_ENABLED = IORING_ACCEPT_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
171                 "io.netty.iouring.acceptMultiShotEnabled", true);
172         IORING_RECV_MULTISHOT_ENABLED = IORING_RECV_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
173                 "io.netty.iouring.recvMultiShotEnabled", true);
174         // Explicit disable RECVSEND_BUNDLE as there is a know kernel bug that will be fixed in the future:
175         // See https://lore.kernel.org/io-uring/[email protected]/
176         //      T/#ma949ad361d376247a16db73e741cb1043e56e6a4
177         IORING_RECVSEND_BUNDLE_ENABLED = IORING_RECVSEND_BUNDLE_SUPPORTED && SystemPropertyUtil.getBoolean(
178                 "io.netty.iouring.recvsendBundleEnabled", false);
179         IORING_POLL_ADD_MULTISHOT_ENABLED = IORING_POLL_ADD_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
180                "io.netty.iouring.pollAddMultishotEnabled", true);
181         NUM_ELEMENTS_IOVEC = numElementsIoVec;
182 
183         DEFAULT_RING_SIZE =  Math.max(16, SystemPropertyUtil.getInt("io.netty.iouring.ringSize", 128));
184         pendingOpsInitialCapacity = SystemPropertyUtil.getInt(
185                 "io.netty.iouring.pendingOpsInitialCapacity", DEFAULT_RING_SIZE);
186         if (pendingOpsInitialCapacity <= 0) {
187             int configuredCapacity = pendingOpsInitialCapacity;
188             pendingOpsInitialCapacity = MathUtil.safeFindNextPositivePowerOfTwo(DEFAULT_RING_SIZE);
189             logger.warn("Invalid value {} for -Dio.netty.iouring.pendingOpsInitialCapacity; using {} instead.",
190                     configuredCapacity, pendingOpsInitialCapacity);
191         } else if (Integer.bitCount(pendingOpsInitialCapacity) != 1) {
192             int configuredCapacity = pendingOpsInitialCapacity;
193             pendingOpsInitialCapacity = MathUtil.safeFindNextPositivePowerOfTwo(pendingOpsInitialCapacity);
194             logger.warn("Rounding -Dio.netty.iouring.pendingOpsInitialCapacity from {} up to {}.",
195                     configuredCapacity, pendingOpsInitialCapacity);
196         }
197         DEFAULT_PENDING_OPS_INITIAL_CAPACITY = pendingOpsInitialCapacity;
198         if (IORING_SETUP_CQ_SIZE_SUPPORTED) {
199             DEFAULT_CQ_SIZE = Math.max(DEFAULT_RING_SIZE,
200                     SystemPropertyUtil.getInt("io.netty.iouring.cqSize", 4096));
201         } else {
202             DEFAULT_CQ_SIZE = DISABLE_SETUP_CQ_SIZE;
203         }
204         // Now that all static fields are assigned, emit the debug log using the shared printFeatures()
205         if (cause != null) {
206             if (logger.isTraceEnabled()) {
207                 logger.debug("IoUring support is not available using kernel {}", kernelVersion, cause);
208             } else if (logger.isDebugEnabled()) {
209                 logger.debug("IoUring support is not available using kernel {}: {}", kernelVersion, cause.getMessage());
210             }
211         } else {
212             if (logger.isDebugEnabled()) {
213                 logger.debug("IoUring support is available using kernel {}: {}", kernelVersion, supportedFeatures());
214             }
215         }
216     }
217 
218     public static boolean isAvailable() {
219         return UNAVAILABILITY_CAUSE == null;
220     }
221 
222     /**
223      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
224      * {@linkplain ChannelOption#TCP_FASTOPEN_CONNECT client-side TCP FastOpen}.
225      *
226      * @return {@code true} if it's possible to use client-side TCP FastOpen via io_uring, otherwise {@code false}.
227      */
228     public static boolean isTcpFastOpenClientSideAvailable() {
229         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_CLIENT;
230     }
231 
232     /**
233      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
234      * {@linkplain ChannelOption#TCP_FASTOPEN server-side TCP FastOpen}.
235      *
236      * @return {@code true} if it's possible to use server-side TCP FastOpen via io_uring, otherwise {@code false}.
237      */
238     public static boolean isTcpFastOpenServerSideAvailable() {
239         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_SERVER;
240     }
241 
242     static boolean isCqeFSockNonEmptySupported() {
243         return IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
244     }
245 
246     /**
247      * Returns if SPLICE is supported or not.
248      *
249      * @return {@code true} if supported, {@code false} otherwise.
250      */
251     public static boolean isSpliceSupported() {
252         return IORING_SPLICE_SUPPORTED;
253     }
254 
255     /**
256      * Returns if {@code IORING_OP_SEND_ZC} is supported.
257      *
258      * @return {@code true} if {@code IORING_OP_SEND_ZC} is supported, {@code false} otherwise.
259      */
260     static boolean isSendZcSupported() {
261         return IORING_SEND_ZC_SUPPORTED;
262     }
263 
264     /**
265      * Returns if {@code IORING_OP_SENDMSG_ZC} is supported.
266      *
267      * @return {@code true} if {@code IORING_OP_SENDMSG_ZC} is supported, {@code false} otherwise.
268      */
269     static boolean isSendmsgZcSupported() {
270         return IORING_SENDMSG_ZC_SUPPORTED;
271     }
272 
273     static boolean isAcceptNoWaitSupported() {
274         return IORING_ACCEPT_NO_WAIT_SUPPORTED;
275     }
276 
277     static boolean isAcceptMultishotSupported() {
278         return IORING_ACCEPT_MULTISHOT_SUPPORTED;
279     }
280 
281     static boolean isRecvMultishotSupported() {
282         return IORING_RECV_MULTISHOT_SUPPORTED;
283     }
284 
285     static boolean isRecvsendBundleSupported() {
286         return IORING_RECVSEND_BUNDLE_SUPPORTED;
287     }
288 
289     static boolean isPollAddMultishotSupported() {
290         return IORING_POLL_ADD_MULTISHOT_SUPPORTED;
291     }
292 
293     static boolean isRegisterIowqMaxWorkersSupported() {
294         return IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
295     }
296 
297     static boolean isSetupCqeSizeSupported() {
298         return IORING_SETUP_CQ_SIZE_SUPPORTED;
299     }
300 
301     static boolean isSetupSubmitAllSupported() {
302         return IORING_SETUP_SUBMIT_ALL_SUPPORTED;
303     }
304 
305     static boolean isSetupCqeMixedSupported() {
306         return IORING_SETUP_CQE_MIXED_SUPPORTED;
307     }
308 
309     static boolean isSetupSingleIssuerSupported() {
310         return IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
311     }
312 
313     static boolean isSetupDeferTaskrunSupported() {
314         return IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
315     }
316 
317     static boolean isIoringSetupNoSqarraySupported() {
318         return IORING_SETUP_NO_SQARRAY_SUPPORTED;
319     }
320     /**
321      * Returns if it is supported to use a buffer ring.
322      *
323      * @return {@code true} if supported, {@code false} otherwise.
324      */
325     public static boolean isRegisterBufferRingSupported() {
326         return IORING_REGISTER_BUFFER_RING_SUPPORTED;
327     }
328 
329     /**
330      * Returns if it is supported to use an incremental buffer ring.
331      *
332      * @return {@code true} if supported, {@code false} otherwise.
333      */
334     public static boolean isRegisterBufferRingIncSupported() {
335         return IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
336     }
337 
338     /**
339      * Returns if multi-shot ACCEPT is used or not.
340      *
341      * @return {@code true} if enabled, {@code false} otherwise.
342      */
343     public static boolean isAcceptMultishotEnabled() {
344         return IORING_ACCEPT_MULTISHOT_ENABLED;
345     }
346 
347     /**
348      * Returns if multi-shot RECV is used or not.
349      *
350      * @return {@code true} if enabled, {@code false} otherwise.
351      */
352     public static boolean isRecvMultishotEnabled() {
353         return IORING_RECV_MULTISHOT_ENABLED;
354     }
355 
356     /**
357      * Returns if RECVSEND bundles are used or not.
358      *
359      * @return {@code true} if enabled, {@code false} otherwise.
360      */
361     public static boolean isRecvsendBundleEnabled() {
362         return IORING_RECVSEND_BUNDLE_ENABLED;
363     }
364 
365     /**
366      * Returns if multi-shot POLL_ADD is used or not.
367      *
368      * @return {@code true} if enabled, {@code false} otherwise.
369      */
370     public static boolean isPollAddMultishotEnabled() {
371         return IORING_POLL_ADD_MULTISHOT_ENABLED;
372     }
373 
374     public static void ensureAvailability() {
375         if (UNAVAILABILITY_CAUSE != null) {
376             throw (Error) new UnsatisfiedLinkError(
377                     "failed to load the required native library").initCause(UNAVAILABILITY_CAUSE);
378         }
379     }
380 
381     static long memoryAddress(ByteBuf buffer) {
382         if (buffer.hasMemoryAddress()) {
383             return buffer.memoryAddress();
384         }
385         // Use internalNioBuffer to reduce object creation.
386         // It is important to add the position as the returned ByteBuffer might be shared by multiple ByteBuf
387         // instances and so has an address that starts before the start of the ByteBuf itself.
388         ByteBuffer byteBuffer = buffer.internalNioBuffer(0, buffer.capacity());
389         return Buffer.memoryAddress(byteBuffer) + byteBuffer.position();
390     }
391 
392     public static Throwable unavailabilityCause() {
393         return UNAVAILABILITY_CAUSE;
394     }
395 
396     private static String supportedFeatures() {
397         if (!isAvailable()) {
398             return "";
399         }
400         return "CQE_F_SOCK_NONEMPTY_SUPPORTED=" + IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED
401                 + ", SPLICE_SUPPORTED=" + IORING_SPLICE_SUPPORTED
402                 + ", ACCEPT_NO_WAIT_SUPPORTED=" + IORING_ACCEPT_NO_WAIT_SUPPORTED
403                 + ", ACCEPT_MULTISHOT_SUPPORTED=" + IORING_ACCEPT_MULTISHOT_SUPPORTED
404                 + ", POLL_ADD_MULTISHOT_SUPPORTED=" + IORING_POLL_ADD_MULTISHOT_SUPPORTED
405                 + ", RECV_MULTISHOT_SUPPORTED=" + IORING_RECV_MULTISHOT_SUPPORTED
406                 + ", IORING_RECVSEND_BUNDLE_SUPPORTED=" + IORING_RECVSEND_BUNDLE_SUPPORTED
407                 + ", REGISTER_IOWQ_MAX_WORKERS_SUPPORTED=" + IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED
408                 + ", SETUP_SUBMIT_ALL_SUPPORTED=" + IORING_SETUP_SUBMIT_ALL_SUPPORTED
409                 + ", SETUP_CQE_MIXED_SUPPORTED=" + IORING_SETUP_CQE_MIXED_SUPPORTED
410                 + ", SETUP_CQ_SIZE_SUPPORTED=" + IORING_SETUP_CQ_SIZE_SUPPORTED
411                 + ", SETUP_SINGLE_ISSUER_SUPPORTED=" + IORING_SETUP_SINGLE_ISSUER_SUPPORTED
412                 + ", SETUP_DEFER_TASKRUN_SUPPORTED=" + IORING_SETUP_DEFER_TASKRUN_SUPPORTED
413                 + ", SETUP_NO_SQARRAY_SUPPORTED=" + IORING_SETUP_NO_SQARRAY_SUPPORTED
414                 + ", REGISTER_BUFFER_RING_SUPPORTED=" + IORING_REGISTER_BUFFER_RING_SUPPORTED
415                 + ", REGISTER_BUFFER_RING_INC_SUPPORTED=" + IORING_REGISTER_BUFFER_RING_INC_SUPPORTED
416                 + ", SEND_ZC_SUPPORTED=" + IORING_SEND_ZC_SUPPORTED
417                 + ", SENDMSG_ZC_SUPPORTED=" + IORING_SENDMSG_ZC_SUPPORTED;
418     }
419 
420     /**
421      * Returns a string representation of the io_uring support and feature set. This mirrors the
422      * debug logging output that reports each individual feature's availability.
423      */
424     public static String featureString() {
425         if (!isAvailable()) {
426             Throwable t = unavailabilityCause();
427             return "IoUring unavailable: " + (t == null ? "unknown cause" : t.toString());
428         }
429         return "IoUring features: " + supportedFeatures();
430     }
431 
432     @Override
433     public String toString() {
434         return featureString();
435     }
436 
437     private IoUring() {
438     }
439 }