View Javadoc
1   /*
2    * Copyright 2024 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.uring;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.channel.ChannelOption;
20  import io.netty.channel.unix.Buffer;
21  import io.netty.channel.unix.Limits;
22  import io.netty.util.internal.PlatformDependent;
23  import io.netty.util.internal.SystemPropertyUtil;
24  import io.netty.util.internal.logging.InternalLogger;
25  import io.netty.util.internal.logging.InternalLoggerFactory;
26  
27  import java.nio.ByteBuffer;
28  
29  public final class IoUring {
30  
31      private static final Throwable UNAVAILABILITY_CAUSE;
32      private static final boolean IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
33      private static final boolean IORING_SPLICE_SUPPORTED;
34      private static final boolean IORING_SEND_ZC_SUPPORTED;
35      private static final boolean IORING_SENDMSG_ZC_SUPPORTED;
36      private static final boolean IORING_ACCEPT_NO_WAIT_SUPPORTED;
37      private static final boolean IORING_ACCEPT_MULTISHOT_SUPPORTED;
38      private static final boolean IORING_RECV_MULTISHOT_SUPPORTED;
39      private static final boolean IORING_RECVSEND_BUNDLE_SUPPORTED;
40      private static final boolean IORING_POLL_ADD_MULTISHOT_SUPPORTED;
41      private static final boolean IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
42      private static final boolean IORING_SETUP_SUBMIT_ALL_SUPPORTED;
43      private static final boolean IORING_SETUP_CQE_MIXED_SUPPORTED;
44      private static final boolean IORING_SETUP_CQ_SIZE_SUPPORTED;
45      private static final boolean IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
46      private static final boolean IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
47      private static final boolean IORING_SETUP_NO_SQARRAY_SUPPORTED;
48      private static final boolean IORING_REGISTER_BUFFER_RING_SUPPORTED;
49      private static final boolean IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
50      private static final boolean IORING_ACCEPT_MULTISHOT_ENABLED;
51      private static final boolean IORING_RECV_MULTISHOT_ENABLED;
52      private static final boolean IORING_RECVSEND_BUNDLE_ENABLED;
53      private static final boolean IORING_POLL_ADD_MULTISHOT_ENABLED;
54      static final int NUM_ELEMENTS_IOVEC;
55      static final int DEFAULT_RING_SIZE;
56      static final int DEFAULT_CQ_SIZE;
57      static final int DISABLE_SETUP_CQ_SIZE = -1;
58  
59      private static final InternalLogger logger;
60  
61      static {
62          logger = InternalLoggerFactory.getInstance(IoUring.class);
63          Throwable cause = null;
64          boolean socketNonEmptySupported = false;
65          boolean spliceSupported = false;
66          boolean sendZcSupported = false;
67          boolean sendmsgZcSupported = false;
68          boolean acceptSupportNoWait = false;
69          boolean acceptMultishotSupported = false;
70          boolean recvsendBundleSupported = false;
71          boolean recvMultishotSupported = false;
72          boolean pollAddMultishotSupported = false;
73          boolean registerIowqWorkersSupported = false;
74          boolean submitAllSupported = false;
75          boolean cqeMixedSupported = false;
76          boolean setUpCqSizeSupported = false;
77          boolean singleIssuerSupported = false;
78          boolean deferTaskrunSupported = false;
79          boolean noSqarraySupported = false;
80          boolean registerBufferRingSupported = false;
81          boolean registerBufferRingIncSupported = false;
82          int numElementsIoVec = 10;
83  
84          String kernelVersion = "[unknown]";
85          try {
86              if (SystemPropertyUtil.getBoolean("io.netty.transport.noNative", false)) {
87                  cause = new UnsupportedOperationException(
88                          "Native transport was explicit disabled with -Dio.netty.transport.noNative=true");
89              } else {
90                  kernelVersion = Native.kernelVersion();
91                  Native.checkKernelVersion(kernelVersion);
92                  if (PlatformDependent.javaVersion() >= 9) {
93                      RingBuffer ringBuffer = null;
94                      try {
95                          ringBuffer = Native.createRingBuffer(1, 0);
96                          if ((ringBuffer.features() & Native.IORING_FEAT_SUBMIT_STABLE) == 0) {
97                              // This should only happen on kernels < 5.4 which we don't support anyway.
98                              throw new UnsupportedOperationException("IORING_FEAT_SUBMIT_STABLE not supported!");
99                          }
100                         // IOV_MAX should be 1024 and an IOV is 16 bytes which means that by default we reserve around
101                         // 160kb.
102                         numElementsIoVec = SystemPropertyUtil.getInt(
103                                 "io.netty.iouring.numElementsIoVec", 10 * Limits.IOV_MAX);
104                         Native.IoUringProbe ioUringProbe = Native.ioUringProbe(ringBuffer.fd());
105                         Native.checkAllIOSupported(ioUringProbe);
106                         socketNonEmptySupported = Native.isCqeFSockNonEmptySupported(ioUringProbe);
107                         spliceSupported = Native.isSpliceSupported(ioUringProbe);
108                         recvsendBundleSupported = (ringBuffer.features() & Native.IORING_FEAT_RECVSEND_BUNDLE) != 0;
109                         sendZcSupported = Native.isSendZcSupported(ioUringProbe);
110                         sendmsgZcSupported =  Native.isSendmsgZcSupported(ioUringProbe);
111                         // IORING_FEAT_RECVSEND_BUNDLE was added in the same release.
112                         acceptSupportNoWait = recvsendBundleSupported;
113 
114                         acceptMultishotSupported = Native.isAcceptMultishotSupported(ioUringProbe);
115                         recvMultishotSupported = Native.isRecvMultishotSupported();
116                         pollAddMultishotSupported = Native.isPollAddMultiShotSupported(ioUringProbe);
117                         registerIowqWorkersSupported = Native.isRegisterIoWqWorkerSupported(ringBuffer.fd());
118                         submitAllSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SUBMIT_ALL);
119                         cqeMixedSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_CQE_MIXED);
120                         setUpCqSizeSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_CQSIZE);
121                         singleIssuerSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SINGLE_ISSUER);
122                         // IORING_SETUP_DEFER_TASKRUN requires to also set IORING_SETUP_SINGLE_ISSUER.
123                         // See https://manpages.debian.org/unstable/liburing-dev/io_uring_setup.2.en.html
124                         deferTaskrunSupported = Native.ioUringSetupSupportsFlags(
125                                 Native.IORING_SETUP_SINGLE_ISSUER | Native.IORING_SETUP_DEFER_TASKRUN);
126                         noSqarraySupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_NO_SQARRAY);
127                         registerBufferRingSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(), 0);
128                         registerBufferRingIncSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(),
129                                 Native.IOU_PBUF_RING_INC);
130                     } finally {
131                         if (ringBuffer != null) {
132                             try {
133                                 ringBuffer.close();
134                             } catch (Exception ignore) {
135                                 // ignore
136                             }
137                         }
138                     }
139                 } else {
140                     cause = new UnsupportedOperationException("Java 9+ is required");
141                 }
142             }
143         } catch (Throwable t) {
144             cause = t;
145         }
146         // Assign static finals first so printFeatures() (no-arg) can read them.
147         UNAVAILABILITY_CAUSE = cause;
148         IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED = socketNonEmptySupported;
149         IORING_SPLICE_SUPPORTED = spliceSupported;
150         IORING_SEND_ZC_SUPPORTED = sendZcSupported;
151         IORING_SENDMSG_ZC_SUPPORTED = sendmsgZcSupported;
152         IORING_ACCEPT_NO_WAIT_SUPPORTED = acceptSupportNoWait;
153         IORING_ACCEPT_MULTISHOT_SUPPORTED = acceptMultishotSupported;
154         IORING_RECV_MULTISHOT_SUPPORTED = recvMultishotSupported;
155         IORING_RECVSEND_BUNDLE_SUPPORTED = recvsendBundleSupported;
156         IORING_POLL_ADD_MULTISHOT_SUPPORTED = pollAddMultishotSupported;
157         IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED = registerIowqWorkersSupported;
158         IORING_SETUP_SUBMIT_ALL_SUPPORTED = submitAllSupported;
159         IORING_SETUP_CQE_MIXED_SUPPORTED = cqeMixedSupported;
160         IORING_SETUP_CQ_SIZE_SUPPORTED = setUpCqSizeSupported;
161         IORING_SETUP_SINGLE_ISSUER_SUPPORTED = singleIssuerSupported;
162         IORING_SETUP_DEFER_TASKRUN_SUPPORTED = deferTaskrunSupported;
163         IORING_SETUP_NO_SQARRAY_SUPPORTED = noSqarraySupported;
164         IORING_REGISTER_BUFFER_RING_SUPPORTED = registerBufferRingSupported;
165         IORING_REGISTER_BUFFER_RING_INC_SUPPORTED = registerBufferRingIncSupported;
166 
167         IORING_ACCEPT_MULTISHOT_ENABLED = IORING_ACCEPT_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
168                 "io.netty.iouring.acceptMultiShotEnabled", true);
169         IORING_RECV_MULTISHOT_ENABLED = IORING_RECV_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
170                 "io.netty.iouring.recvMultiShotEnabled", true);
171         // Explicit disable RECVSEND_BUNDLE as there is a know kernel bug that will be fixed in the future:
172         // See https://lore.kernel.org/io-uring/[email protected]/
173         //      T/#ma949ad361d376247a16db73e741cb1043e56e6a4
174         IORING_RECVSEND_BUNDLE_ENABLED = IORING_RECVSEND_BUNDLE_SUPPORTED && SystemPropertyUtil.getBoolean(
175                 "io.netty.iouring.recvsendBundleEnabled", false);
176         IORING_POLL_ADD_MULTISHOT_ENABLED = IORING_POLL_ADD_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
177                "io.netty.iouring.pollAddMultishotEnabled", true);
178         NUM_ELEMENTS_IOVEC = numElementsIoVec;
179 
180         DEFAULT_RING_SIZE =  Math.max(16, SystemPropertyUtil.getInt("io.netty.iouring.ringSize", 128));
181 
182         if (IORING_SETUP_CQ_SIZE_SUPPORTED) {
183             DEFAULT_CQ_SIZE = Math.max(DEFAULT_RING_SIZE,
184                     SystemPropertyUtil.getInt("io.netty.iouring.cqSize", 4096));
185         } else {
186             DEFAULT_CQ_SIZE = DISABLE_SETUP_CQ_SIZE;
187         }
188         // Now that all static fields are assigned, emit the debug log using the shared printFeatures()
189         if (cause != null) {
190             if (logger.isTraceEnabled()) {
191                 logger.debug("IoUring support is not available using kernel {}", kernelVersion, cause);
192             } else if (logger.isDebugEnabled()) {
193                 logger.debug("IoUring support is not available using kernel {}: {}", kernelVersion, cause.getMessage());
194             }
195         } else {
196             if (logger.isDebugEnabled()) {
197                 logger.debug("IoUring support is available using kernel {}: {}", kernelVersion, supportedFeatures());
198             }
199         }
200     }
201 
202     public static boolean isAvailable() {
203         return UNAVAILABILITY_CAUSE == null;
204     }
205 
206     /**
207      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
208      * {@linkplain ChannelOption#TCP_FASTOPEN_CONNECT client-side TCP FastOpen}.
209      *
210      * @return {@code true} if it's possible to use client-side TCP FastOpen via io_uring, otherwise {@code false}.
211      */
212     public static boolean isTcpFastOpenClientSideAvailable() {
213         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_CLIENT;
214     }
215 
216     /**
217      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
218      * {@linkplain ChannelOption#TCP_FASTOPEN server-side TCP FastOpen}.
219      *
220      * @return {@code true} if it's possible to use server-side TCP FastOpen via io_uring, otherwise {@code false}.
221      */
222     public static boolean isTcpFastOpenServerSideAvailable() {
223         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_SERVER;
224     }
225 
226     static boolean isCqeFSockNonEmptySupported() {
227         return IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
228     }
229 
230     /**
231      * Returns if SPLICE is supported or not.
232      *
233      * @return {@code true} if supported, {@code false} otherwise.
234      */
235     public static boolean isSpliceSupported() {
236         return IORING_SPLICE_SUPPORTED;
237     }
238 
239     /**
240      * Returns if {@code IORING_OP_SEND_ZC} is supported.
241      *
242      * @return {@code true} if {@code IORING_OP_SEND_ZC} is supported, {@code false} otherwise.
243      */
244     static boolean isSendZcSupported() {
245         return IORING_SEND_ZC_SUPPORTED;
246     }
247 
248     /**
249      * Returns if {@code IORING_OP_SENDMSG_ZC} is supported.
250      *
251      * @return {@code true} if {@code IORING_OP_SENDMSG_ZC} is supported, {@code false} otherwise.
252      */
253     static boolean isSendmsgZcSupported() {
254         return IORING_SENDMSG_ZC_SUPPORTED;
255     }
256 
257     static boolean isAcceptNoWaitSupported() {
258         return IORING_ACCEPT_NO_WAIT_SUPPORTED;
259     }
260 
261     static boolean isAcceptMultishotSupported() {
262         return IORING_ACCEPT_MULTISHOT_SUPPORTED;
263     }
264 
265     static boolean isRecvMultishotSupported() {
266         return IORING_RECV_MULTISHOT_SUPPORTED;
267     }
268 
269     static boolean isRecvsendBundleSupported() {
270         return IORING_RECVSEND_BUNDLE_SUPPORTED;
271     }
272 
273     static boolean isPollAddMultishotSupported() {
274         return IORING_POLL_ADD_MULTISHOT_SUPPORTED;
275     }
276 
277     static boolean isRegisterIowqMaxWorkersSupported() {
278         return IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
279     }
280 
281     static boolean isSetupCqeSizeSupported() {
282         return IORING_SETUP_CQ_SIZE_SUPPORTED;
283     }
284 
285     static boolean isSetupSubmitAllSupported() {
286         return IORING_SETUP_SUBMIT_ALL_SUPPORTED;
287     }
288 
289     static boolean isSetupCqeMixedSupported() {
290         return IORING_SETUP_CQE_MIXED_SUPPORTED;
291     }
292 
293     static boolean isSetupSingleIssuerSupported() {
294         return IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
295     }
296 
297     static boolean isSetupDeferTaskrunSupported() {
298         return IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
299     }
300 
301     static boolean isIoringSetupNoSqarraySupported() {
302         return IORING_SETUP_NO_SQARRAY_SUPPORTED;
303     }
304     /**
305      * Returns if it is supported to use a buffer ring.
306      *
307      * @return {@code true} if supported, {@code false} otherwise.
308      */
309     public static boolean isRegisterBufferRingSupported() {
310         return IORING_REGISTER_BUFFER_RING_SUPPORTED;
311     }
312 
313     /**
314      * Returns if it is supported to use an incremental buffer ring.
315      *
316      * @return {@code true} if supported, {@code false} otherwise.
317      */
318     public static boolean isRegisterBufferRingIncSupported() {
319         return IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
320     }
321 
322     /**
323      * Returns if multi-shot ACCEPT is used or not.
324      *
325      * @return {@code true} if enabled, {@code false} otherwise.
326      */
327     public static boolean isAcceptMultishotEnabled() {
328         return IORING_ACCEPT_MULTISHOT_ENABLED;
329     }
330 
331     /**
332      * Returns if multi-shot RECV is used or not.
333      *
334      * @return {@code true} if enabled, {@code false} otherwise.
335      */
336     public static boolean isRecvMultishotEnabled() {
337         return IORING_RECV_MULTISHOT_ENABLED;
338     }
339 
340     /**
341      * Returns if RECVSEND bundles are used or not.
342      *
343      * @return {@code true} if enabled, {@code false} otherwise.
344      */
345     public static boolean isRecvsendBundleEnabled() {
346         return IORING_RECVSEND_BUNDLE_ENABLED;
347     }
348 
349     /**
350      * Returns if multi-shot POLL_ADD is used or not.
351      *
352      * @return {@code true} if enabled, {@code false} otherwise.
353      */
354     public static boolean isPollAddMultishotEnabled() {
355         return IORING_POLL_ADD_MULTISHOT_ENABLED;
356     }
357 
358     public static void ensureAvailability() {
359         if (UNAVAILABILITY_CAUSE != null) {
360             throw (Error) new UnsatisfiedLinkError(
361                     "failed to load the required native library").initCause(UNAVAILABILITY_CAUSE);
362         }
363     }
364 
365     static long memoryAddress(ByteBuf buffer) {
366         if (buffer.hasMemoryAddress()) {
367             return buffer.memoryAddress();
368         }
369         // Use internalNioBuffer to reduce object creation.
370         // It is important to add the position as the returned ByteBuffer might be shared by multiple ByteBuf
371         // instances and so has an address that starts before the start of the ByteBuf itself.
372         ByteBuffer byteBuffer = buffer.internalNioBuffer(0, buffer.capacity());
373         return Buffer.memoryAddress(byteBuffer) + byteBuffer.position();
374     }
375 
376     public static Throwable unavailabilityCause() {
377         return UNAVAILABILITY_CAUSE;
378     }
379 
380     private static String supportedFeatures() {
381         if (!isAvailable()) {
382             return "";
383         }
384         return "CQE_F_SOCK_NONEMPTY_SUPPORTED=" + IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED
385                 + ", SPLICE_SUPPORTED=" + IORING_SPLICE_SUPPORTED
386                 + ", ACCEPT_NO_WAIT_SUPPORTED=" + IORING_ACCEPT_NO_WAIT_SUPPORTED
387                 + ", ACCEPT_MULTISHOT_SUPPORTED=" + IORING_ACCEPT_MULTISHOT_SUPPORTED
388                 + ", POLL_ADD_MULTISHOT_SUPPORTED=" + IORING_POLL_ADD_MULTISHOT_SUPPORTED
389                 + ", RECV_MULTISHOT_SUPPORTED=" + IORING_RECV_MULTISHOT_SUPPORTED
390                 + ", IORING_RECVSEND_BUNDLE_SUPPORTED=" + IORING_RECVSEND_BUNDLE_SUPPORTED
391                 + ", REGISTER_IOWQ_MAX_WORKERS_SUPPORTED=" + IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED
392                 + ", SETUP_SUBMIT_ALL_SUPPORTED=" + IORING_SETUP_SUBMIT_ALL_SUPPORTED
393                 + ", SETUP_CQE_MIXED_SUPPORTED=" + IORING_SETUP_CQE_MIXED_SUPPORTED
394                 + ", SETUP_CQ_SIZE_SUPPORTED=" + IORING_SETUP_CQ_SIZE_SUPPORTED
395                 + ", SETUP_SINGLE_ISSUER_SUPPORTED=" + IORING_SETUP_SINGLE_ISSUER_SUPPORTED
396                 + ", SETUP_DEFER_TASKRUN_SUPPORTED=" + IORING_SETUP_DEFER_TASKRUN_SUPPORTED
397                 + ", SETUP_NO_SQARRAY_SUPPORTED=" + IORING_SETUP_NO_SQARRAY_SUPPORTED
398                 + ", REGISTER_BUFFER_RING_SUPPORTED=" + IORING_REGISTER_BUFFER_RING_SUPPORTED
399                 + ", REGISTER_BUFFER_RING_INC_SUPPORTED=" + IORING_REGISTER_BUFFER_RING_INC_SUPPORTED
400                 + ", SEND_ZC_SUPPORTED=" + IORING_SEND_ZC_SUPPORTED
401                 + ", SENDMSG_ZC_SUPPORTED=" + IORING_SENDMSG_ZC_SUPPORTED;
402     }
403 
404     /**
405      * Returns a string representation of the io_uring support and feature set. This mirrors the
406      * debug logging output that reports each individual feature's availability.
407      */
408     public static String featureString() {
409         if (!isAvailable()) {
410             Throwable t = unavailabilityCause();
411             return "IoUring unavailable: " + (t == null ? "unknown cause" : t.toString());
412         }
413         return "IoUring features: " + supportedFeatures();
414     }
415 
416     @Override
417     public String toString() {
418         return featureString();
419     }
420 
421     private IoUring() {
422     }
423 }