View Javadoc
1   /*
2    * Copyright 2024 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.uring;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.channel.ChannelOption;
20  import io.netty.channel.unix.Buffer;
21  import io.netty.channel.unix.Limits;
22  import io.netty.util.internal.PlatformDependent;
23  import io.netty.util.internal.SystemPropertyUtil;
24  import io.netty.util.internal.logging.InternalLogger;
25  import io.netty.util.internal.logging.InternalLoggerFactory;
26  
27  import java.nio.ByteBuffer;
28  
29  public final class IoUring {
30  
31      private static final Throwable UNAVAILABILITY_CAUSE;
32      private static final boolean IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
33      private static final boolean IORING_SPLICE_SUPPORTED;
34      private static final boolean IORING_ACCEPT_NO_WAIT_SUPPORTED;
35      private static final boolean IORING_ACCEPT_MULTISHOT_SUPPORTED;
36      private static final boolean IORING_RECV_MULTISHOT_SUPPORTED;
37      private static final boolean IORING_RECVSEND_BUNDLE_SUPPORTED;
38      private static final boolean IORING_POLL_ADD_MULTISHOT_SUPPORTED;
39      private static final boolean IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
40      private static final boolean IORING_SETUP_SUBMIT_ALL_SUPPORTED;
41      private static final boolean IORING_SETUP_CQ_SIZE_SUPPORTED;
42      private static final boolean IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
43      private static final boolean IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
44      private static final boolean IORING_REGISTER_BUFFER_RING_SUPPORTED;
45      private static final boolean IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
46      private static final boolean IORING_ACCEPT_MULTISHOT_ENABLED;
47      private static final boolean IORING_RECV_MULTISHOT_ENABLED;
48      private static final boolean IORING_RECVSEND_BUNDLE_ENABLED;
49      private static final boolean IORING_POLL_ADD_MULTISHOT_ENABLED;
50      static final int NUM_ELEMENTS_IOVEC;
51  
52      private static final InternalLogger logger;
53  
54      static {
55          logger = InternalLoggerFactory.getInstance(IoUring.class);
56          Throwable cause = null;
57          boolean socketNonEmptySupported = false;
58          boolean spliceSupported = false;
59          boolean acceptSupportNoWait = false;
60          boolean acceptMultishotSupported = false;
61          boolean recvsendBundleSupported = false;
62          boolean recvMultishotSupported = false;
63          boolean pollAddMultishotSupported = false;
64          boolean registerIowqWorkersSupported = false;
65          boolean submitAllSupported = false;
66          boolean setUpCqSizeSupported = false;
67          boolean singleIssuerSupported = false;
68          boolean deferTaskrunSupported = false;
69          boolean registerBufferRingSupported = false;
70          boolean registerBufferRingIncSupported = false;
71          int numElementsIoVec = 10;
72  
73          String kernelVersion = "[unknown]";
74          try {
75              if (SystemPropertyUtil.getBoolean("io.netty.transport.noNative", false)) {
76                  cause = new UnsupportedOperationException(
77                          "Native transport was explicit disabled with -Dio.netty.transport.noNative=true");
78              } else {
79                  kernelVersion = Native.kernelVersion();
80                  Native.checkKernelVersion(kernelVersion);
81                  if (PlatformDependent.javaVersion() >= 9) {
82                      RingBuffer ringBuffer = null;
83                      try {
84                          ringBuffer = Native.createRingBuffer(1, 0);
85                          if ((ringBuffer.features() & Native.IORING_FEAT_SUBMIT_STABLE) == 0) {
86                              // This should only happen on kernels < 5.4 which we don't support anyway.
87                              throw new UnsupportedOperationException("IORING_FEAT_SUBMIT_STABLE not supported!");
88                          }
89                          // IOV_MAX should be 1024 and an IOV is 16 bytes which means that by default we reserve around
90                          // 160kb.
91                          numElementsIoVec = SystemPropertyUtil.getInt(
92                                  "io.netty.iouring.numElementsIoVec", 10 *  Limits.IOV_MAX);
93                          Native.checkAllIOSupported(ringBuffer.fd());
94                          socketNonEmptySupported = Native.isCqeFSockNonEmptySupported(ringBuffer.fd());
95                          spliceSupported = Native.isSpliceSupported(ringBuffer.fd());
96                          recvsendBundleSupported = (ringBuffer.features() & Native.IORING_FEAT_RECVSEND_BUNDLE) != 0;
97                          // IORING_FEAT_RECVSEND_BUNDLE was added in the same release.
98                          acceptSupportNoWait = recvsendBundleSupported;
99  
100                         acceptMultishotSupported = Native.isAcceptMultishotSupported(ringBuffer.fd());
101                         recvMultishotSupported = Native.isRecvMultishotSupported();
102                         pollAddMultishotSupported = Native.isPollAddMultiShotSupported(ringBuffer.fd());
103                         registerIowqWorkersSupported = Native.isRegisterIoWqWorkerSupported(ringBuffer.fd());
104                         submitAllSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SUBMIT_ALL);
105                         setUpCqSizeSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_CQSIZE);
106                         singleIssuerSupported = Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SINGLE_ISSUER);
107                         // IORING_SETUP_DEFER_TASKRUN requires to also set IORING_SETUP_SINGLE_ISSUER.
108                         // See https://manpages.debian.org/unstable/liburing-dev/io_uring_setup.2.en.html
109                         deferTaskrunSupported = Native.ioUringSetupSupportsFlags(
110                                 Native.IORING_SETUP_SINGLE_ISSUER | Native.IORING_SETUP_DEFER_TASKRUN);
111                         registerBufferRingSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(), 0);
112                         registerBufferRingIncSupported = Native.isRegisterBufferRingSupported(ringBuffer.fd(),
113                                 Native.IOU_PBUF_RING_INC);
114                     } finally {
115                         if (ringBuffer != null) {
116                             try {
117                                 ringBuffer.close();
118                             } catch (Exception ignore) {
119                                 // ignore
120                             }
121                         }
122                     }
123                 } else {
124                     cause = new UnsupportedOperationException("Java 9+ is required");
125                 }
126             }
127         } catch (Throwable t) {
128             cause = t;
129         }
130         if (cause != null) {
131             if (logger.isTraceEnabled()) {
132                 logger.debug("IoUring support is not available using kernel {}", kernelVersion, cause);
133             } else if (logger.isDebugEnabled()) {
134                 logger.debug("IoUring support is not available using kernel {}: {}", kernelVersion, cause.getMessage());
135             }
136         } else {
137             if (logger.isDebugEnabled()) {
138                 logger.debug("IoUring support is available using kernel {} (" +
139                         "CQE_F_SOCK_NONEMPTY_SUPPORTED={}, " +
140                         "SPLICE_SUPPORTED={}, " +
141                         "ACCEPT_NO_WAIT_SUPPORTED={}, " +
142                         "ACCEPT_MULTISHOT_SUPPORTED={}, " +
143                         "POLL_ADD_MULTISHOT_SUPPORTED={} " +
144                         "RECV_MULTISHOT_SUPPORTED={}, " +
145                         "IORING_RECVSEND_BUNDLE_SUPPORTED={}, " +
146                         "REGISTER_IOWQ_MAX_WORKERS_SUPPORTED={}, " +
147                         "SETUP_SUBMIT_ALL_SUPPORTED={}, " +
148                         "SETUP_SINGLE_ISSUER_SUPPORTED={}, " +
149                         "SETUP_DEFER_TASKRUN_SUPPORTED={}, " +
150                         "REGISTER_BUFFER_RING_SUPPORTED={}, " +
151                         "REGISTER_BUFFER_RING_INC_SUPPORTED={}" +
152                         ")", kernelVersion, socketNonEmptySupported, spliceSupported, acceptSupportNoWait,
153                         acceptMultishotSupported, pollAddMultishotSupported, recvMultishotSupported,
154                         recvsendBundleSupported, registerIowqWorkersSupported, submitAllSupported,
155                         singleIssuerSupported, deferTaskrunSupported,
156                         registerBufferRingSupported, registerBufferRingIncSupported);
157             }
158         }
159         UNAVAILABILITY_CAUSE = cause;
160         IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED = socketNonEmptySupported;
161         IORING_SPLICE_SUPPORTED = spliceSupported;
162         IORING_ACCEPT_NO_WAIT_SUPPORTED = acceptSupportNoWait;
163         IORING_ACCEPT_MULTISHOT_SUPPORTED = acceptMultishotSupported;
164         IORING_RECV_MULTISHOT_SUPPORTED = recvMultishotSupported;
165         IORING_RECVSEND_BUNDLE_SUPPORTED = recvsendBundleSupported;
166         IORING_POLL_ADD_MULTISHOT_SUPPORTED = pollAddMultishotSupported;
167         IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED = registerIowqWorkersSupported;
168         IORING_SETUP_SUBMIT_ALL_SUPPORTED = submitAllSupported;
169         IORING_SETUP_CQ_SIZE_SUPPORTED = setUpCqSizeSupported;
170         IORING_SETUP_SINGLE_ISSUER_SUPPORTED = singleIssuerSupported;
171         IORING_SETUP_DEFER_TASKRUN_SUPPORTED = deferTaskrunSupported;
172         IORING_REGISTER_BUFFER_RING_SUPPORTED = registerBufferRingSupported;
173         IORING_REGISTER_BUFFER_RING_INC_SUPPORTED = registerBufferRingIncSupported;
174 
175         IORING_ACCEPT_MULTISHOT_ENABLED = IORING_ACCEPT_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
176                 "io.netty.iouring.acceptMultiShotEnabled", true);
177         IORING_RECV_MULTISHOT_ENABLED = IORING_RECV_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
178                 "io.netty.iouring.recvMultiShotEnabled", true);
179         // Explicit disable RECVSEND_BUNDLE as there is a know kernel bug that will be fixed in the future:
180         // See https://lore.kernel.org/io-uring/[email protected]/
181         //      T/#ma949ad361d376247a16db73e741cb1043e56e6a4
182         IORING_RECVSEND_BUNDLE_ENABLED = IORING_RECVSEND_BUNDLE_SUPPORTED && SystemPropertyUtil.getBoolean(
183                 "io.netty.iouring.recvsendBundleEnabled", false);
184         IORING_POLL_ADD_MULTISHOT_ENABLED = IORING_POLL_ADD_MULTISHOT_SUPPORTED && SystemPropertyUtil.getBoolean(
185                "io.netty.iouring.pollAddMultishotEnabled", true);
186         NUM_ELEMENTS_IOVEC = numElementsIoVec;
187     }
188 
189     public static boolean isAvailable() {
190         return UNAVAILABILITY_CAUSE == null;
191     }
192 
193     /**
194      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
195      * {@linkplain ChannelOption#TCP_FASTOPEN_CONNECT client-side TCP FastOpen}.
196      *
197      * @return {@code true} if it's possible to use client-side TCP FastOpen via io_uring, otherwise {@code false}.
198      */
199     public static boolean isTcpFastOpenClientSideAvailable() {
200         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_CLIENT;
201     }
202 
203     /**
204      * Returns {@code true} if the io_uring native transport is both {@linkplain #isAvailable() available} and supports
205      * {@linkplain ChannelOption#TCP_FASTOPEN server-side TCP FastOpen}.
206      *
207      * @return {@code true} if it's possible to use server-side TCP FastOpen via io_uring, otherwise {@code false}.
208      */
209     public static boolean isTcpFastOpenServerSideAvailable() {
210         return isAvailable() && Native.IS_SUPPORTING_TCP_FASTOPEN_SERVER;
211     }
212 
213     static boolean isCqeFSockNonEmptySupported() {
214         return IORING_CQE_F_SOCK_NONEMPTY_SUPPORTED;
215     }
216 
217     /**
218      * Returns if SPLICE is supported or not.
219      *
220      * @return {@code true} if supported, {@code false} otherwise.
221      */
222     public static boolean isSpliceSupported() {
223         return IORING_SPLICE_SUPPORTED;
224     }
225 
226     static boolean isAcceptNoWaitSupported() {
227         return IORING_ACCEPT_NO_WAIT_SUPPORTED;
228     }
229 
230     static boolean isAcceptMultishotSupported() {
231         return IORING_ACCEPT_MULTISHOT_SUPPORTED;
232     }
233 
234     static boolean isRecvMultishotSupported() {
235         return IORING_RECV_MULTISHOT_SUPPORTED;
236     }
237 
238     static boolean isRecvsendBundleSupported() {
239         return IORING_RECVSEND_BUNDLE_SUPPORTED;
240     }
241 
242     static boolean isPollAddMultishotSupported() {
243         return IORING_POLL_ADD_MULTISHOT_SUPPORTED;
244     }
245 
246     static boolean isRegisterIowqMaxWorkersSupported() {
247         return IORING_REGISTER_IOWQ_MAX_WORKERS_SUPPORTED;
248     }
249 
250     static boolean isSetupCqeSizeSupported() {
251         return IORING_SETUP_CQ_SIZE_SUPPORTED;
252     }
253 
254     static boolean isSetupSubmitAllSupported() {
255         return IORING_SETUP_SUBMIT_ALL_SUPPORTED;
256     }
257 
258     static boolean isSetupSingleIssuerSupported() {
259         return IORING_SETUP_SINGLE_ISSUER_SUPPORTED;
260     }
261 
262     static boolean isSetupDeferTaskrunSupported() {
263         return IORING_SETUP_DEFER_TASKRUN_SUPPORTED;
264     }
265 
266     /**
267      * Returns if it is supported to use a buffer ring.
268      *
269      * @return {@code true} if supported, {@code false} otherwise.
270      */
271     public static boolean isRegisterBufferRingSupported() {
272         return IORING_REGISTER_BUFFER_RING_SUPPORTED;
273     }
274 
275     /**
276      * Returns if it is supported to use an incremental buffer ring.
277      *
278      * @return {@code true} if supported, {@code false} otherwise.
279      */
280     public static boolean isRegisterBufferRingIncSupported() {
281         return IORING_REGISTER_BUFFER_RING_INC_SUPPORTED;
282     }
283 
284     /**
285      * Returns if multi-shot ACCEPT is used or not.
286      *
287      * @return {@code true} if enabled, {@code false} otherwise.
288      */
289     public static boolean isAcceptMultishotEnabled() {
290         return IORING_ACCEPT_MULTISHOT_ENABLED;
291     }
292 
293     /**
294      * Returns if multi-shot RECV is used or not.
295      *
296      * @return {@code true} if enabled, {@code false} otherwise.
297      */
298     public static boolean isRecvMultishotEnabled() {
299         return IORING_RECV_MULTISHOT_ENABLED;
300     }
301 
302     /**
303      * Returns if RECVSEND bundles are used or not.
304      *
305      * @return {@code true} if enabled, {@code false} otherwise.
306      */
307     public static boolean isRecvsendBundleEnabled() {
308         return IORING_RECVSEND_BUNDLE_ENABLED;
309     }
310 
311     /**
312      * Returns if multi-shot POLL_ADD is used or not.
313      *
314      * @return {@code true} if enabled, {@code false} otherwise.
315      */
316     public static boolean isPollAddMultishotEnabled() {
317         return IORING_POLL_ADD_MULTISHOT_ENABLED;
318     }
319 
320     public static void ensureAvailability() {
321         if (UNAVAILABILITY_CAUSE != null) {
322             throw (Error) new UnsatisfiedLinkError(
323                     "failed to load the required native library").initCause(UNAVAILABILITY_CAUSE);
324         }
325     }
326 
327     static long memoryAddress(ByteBuf buffer) {
328         if (buffer.hasMemoryAddress()) {
329             return buffer.memoryAddress();
330         }
331         // Use internalNioBuffer to reduce object creation.
332         // It is important to add the position as the returned ByteBuffer might be shared by multiple ByteBuf
333         // instances and so has an address that starts before the start of the ByteBuf itself.
334         ByteBuffer byteBuffer = buffer.internalNioBuffer(0, buffer.capacity());
335         return Buffer.memoryAddress(byteBuffer) + byteBuffer.position();
336     }
337 
338     public static Throwable unavailabilityCause() {
339         return UNAVAILABILITY_CAUSE;
340     }
341 
342     private IoUring() {
343     }
344 }