View Javadoc
1   /*
2    * Copyright 2024 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.channel.uring;
17  
18  import io.netty.channel.DefaultFileRegion;
19  import io.netty.channel.unix.Buffer;
20  import io.netty.util.internal.ObjectUtil;
21  import io.netty.util.internal.logging.InternalLogger;
22  import io.netty.util.internal.logging.InternalLoggerFactory;
23  import io.netty.channel.unix.FileDescriptor;
24  import io.netty.channel.unix.PeerCredentials;
25  import io.netty.channel.unix.Unix;
26  import io.netty.util.internal.ClassInitializerUtil;
27  import io.netty.util.internal.NativeLibraryLoader;
28  import io.netty.util.internal.PlatformDependent;
29  import io.netty.util.internal.SystemPropertyUtil;
30  import io.netty.util.internal.ThrowableUtil;
31  
32  import java.io.File;
33  import java.io.IOException;
34  import java.nio.channels.Selector;
35  import java.nio.file.Path;
36  import java.util.Arrays;
37  import java.util.Locale;
38  
39  final class Native {
40      private static final InternalLogger logger = InternalLoggerFactory.getInstance(Native.class);
41  
42      static {
43          Selector selector = null;
44          try {
45              // We call Selector.open() as this will under the hood cause IOUtil to be loaded.
46              // This is a workaround for a possible classloader deadlock that could happen otherwise:
47              //
48              // See https://github.com/netty/netty/issues/10187
49              selector = Selector.open();
50          } catch (IOException ignore) {
51              // Just ignore
52          }
53  
54          // Preload all classes that will be used in the OnLoad(...) function of JNI to eliminate the possiblity of a
55          // class-loader deadlock. This is a workaround for https://github.com/netty/netty/issues/11209.
56  
57          // This needs to match all the classes that are loaded via NETTY_JNI_UTIL_LOAD_CLASS or looked up via
58          // NETTY_JNI_UTIL_FIND_CLASS.
59          ClassInitializerUtil.tryLoadClasses(
60                  Native.class,
61                  // netty_io_uring_linuxsocket
62                  PeerCredentials.class, java.io.FileDescriptor.class
63          );
64  
65          File tmpDir = PlatformDependent.tmpdir();
66          Path tmpFile = tmpDir.toPath().resolve("netty_io_uring.tmp");
67          try {
68              // First, try calling a side-effect free JNI method to see if the library was already
69              // loaded by the application.
70              Native.createFile(tmpFile.toString());
71          } catch (UnsatisfiedLinkError ignore) {
72              // The library was not previously loaded, load it now.
73              loadNativeLibrary();
74          } finally {
75              tmpFile.toFile().delete();
76              try {
77                  if (selector != null) {
78                      selector.close();
79                  }
80              } catch (IOException ignore) {
81                  // Just ignore
82              }
83          }
84          Unix.registerInternal(Native::registerUnix);
85      }
86  
87      static final int SOCK_NONBLOCK = NativeStaticallyReferencedJniMethods.sockNonblock();
88      static final int SOCK_CLOEXEC = NativeStaticallyReferencedJniMethods.sockCloexec();
89      static final short AF_INET = (short) NativeStaticallyReferencedJniMethods.afInet();
90      static final short AF_INET6 = (short) NativeStaticallyReferencedJniMethods.afInet6();
91      static final short AF_UNIX = (short) NativeStaticallyReferencedJniMethods.afUnix();
92      static final int SIZEOF_SOCKADDR_STORAGE = NativeStaticallyReferencedJniMethods.sizeofSockaddrStorage();
93      static final int SIZEOF_SOCKADDR_UN = NativeStaticallyReferencedJniMethods.sizeofSockaddrUn();
94      static final int SOCKADDR_UN_OFFSETOF_SUN_FAMILY =
95              NativeStaticallyReferencedJniMethods.sockaddrUnOffsetofSunFamily();
96      static final int SOCKADDR_UN_OFFSETOF_SUN_PATH =
97              NativeStaticallyReferencedJniMethods.sockaddrUnOffsetofSunPath();
98      static final int MAX_SUN_PATH_LEN =
99              NativeStaticallyReferencedJniMethods.maxSunPathLen();
100     static final int SIZEOF_SOCKADDR_IN = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn();
101     static final int SIZEOF_SOCKADDR_IN6 = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn6();
102     static final int SOCKADDR_IN_OFFSETOF_SIN_FAMILY =
103             NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinFamily();
104     static final int SOCKADDR_IN_OFFSETOF_SIN_PORT = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinPort();
105     static final int SOCKADDR_IN_OFFSETOF_SIN_ADDR = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinAddr();
106     static final int IN_ADDRESS_OFFSETOF_S_ADDR = NativeStaticallyReferencedJniMethods.inAddressOffsetofSAddr();
107     static final int SOCKADDR_IN6_OFFSETOF_SIN6_FAMILY =
108             NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Family();
109     static final int SOCKADDR_IN6_OFFSETOF_SIN6_PORT =
110             NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Port();
111     static final int SOCKADDR_IN6_OFFSETOF_SIN6_FLOWINFO =
112             NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Flowinfo();
113     static final int SOCKADDR_IN6_OFFSETOF_SIN6_ADDR =
114             NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Addr();
115     static final int SOCKADDR_IN6_OFFSETOF_SIN6_SCOPE_ID =
116             NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6ScopeId();
117     static final int IN6_ADDRESS_OFFSETOF_S6_ADDR = NativeStaticallyReferencedJniMethods.in6AddressOffsetofS6Addr();
118     static final int SIZEOF_SIZE_T = NativeStaticallyReferencedJniMethods.sizeofSizeT();
119     static final int SIZEOF_IOVEC = NativeStaticallyReferencedJniMethods.sizeofIovec();
120     static final int CMSG_SPACE = NativeStaticallyReferencedJniMethods.cmsgSpace();
121     static final int CMSG_SPACE_FOR_FD = NativeStaticallyReferencedJniMethods.cmsgSpaceForFd();
122     static final int CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsgLen();
123     static final int CMSG_LEN_FOR_FD = NativeStaticallyReferencedJniMethods.cmsgLenForFd();
124     static final int MSG_CONTROL_LEN_FOR_FD = NativeStaticallyReferencedJniMethods.msgControlLenForFd();
125     static final int CMSG_OFFSETOF_CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLen();
126     static final int CMSG_OFFSETOF_CMSG_LEVEL = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLevel();
127     static final int CMSG_OFFSETOF_CMSG_TYPE = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgType();
128 
129     static final int IO_URING_BUFFER_RING_TAIL = NativeStaticallyReferencedJniMethods.ioUringBufferRingOffsetTail();
130 
131     static final int IOVEC_OFFSETOF_IOV_BASE = NativeStaticallyReferencedJniMethods.iovecOffsetofIovBase();
132     static final int IOVEC_OFFSETOF_IOV_LEN = NativeStaticallyReferencedJniMethods.iovecOffsetofIovLen();
133     static final int SIZEOF_MSGHDR = NativeStaticallyReferencedJniMethods.sizeofMsghdr();
134     static final int MSGHDR_OFFSETOF_MSG_NAME = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgName();
135     static final int MSGHDR_OFFSETOF_MSG_NAMELEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgNamelen();
136     static final int MSGHDR_OFFSETOF_MSG_IOV = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIov();
137     static final int MSGHDR_OFFSETOF_MSG_IOVLEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIovlen();
138     static final int MSGHDR_OFFSETOF_MSG_CONTROL = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControl();
139     static final int MSGHDR_OFFSETOF_MSG_CONTROLLEN =
140             NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControllen();
141     static final int MSGHDR_OFFSETOF_MSG_FLAGS = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgFlags();
142     static final int POLLIN = NativeStaticallyReferencedJniMethods.pollin();
143     static final int POLLOUT = NativeStaticallyReferencedJniMethods.pollout();
144     static final int POLLRDHUP = NativeStaticallyReferencedJniMethods.pollrdhup();
145     static final int ERRNO_ECANCELED_NEGATIVE = -NativeStaticallyReferencedJniMethods.ecanceled();
146     static final int ERRNO_ETIME_NEGATIVE = -NativeStaticallyReferencedJniMethods.etime();
147     static final int ERRNO_NOBUFS_NEGATIVE = -NativeStaticallyReferencedJniMethods.enobufs();
148 
149     static final int PAGE_SIZE = NativeStaticallyReferencedJniMethods.pageSize();
150     static final int MAX_SKB_FRAGS = NativeStaticallyReferencedJniMethods.maxSkbFrags();
151 
152     static final int SIZEOF_IOURING_BUF = NativeStaticallyReferencedJniMethods.sizeofIoUringBuf();
153     static final int IOURING_BUFFER_OFFSETOF_ADDR = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetAddr();
154     static final int IOURING_BUFFER_OFFSETOF_LEN = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetLen();
155     static final int IOURING_BUFFER_OFFSETOF_BID = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetBid();
156 
157     // These constants must be defined to have the same numeric value as their corresponding
158     // ordinal in the enum defined in the io_uring.h header file.
159     // DO NOT CHANGE THESE VALUES!
160     static final byte IORING_OP_NOP = 0; // Specified by IORING_OP_NOP in io_uring.h
161     static final byte IORING_OP_READV = 1; // Specified by IORING_OP_READV in io_uring.h
162     static final byte IORING_OP_WRITEV = 2; // Specified by IORING_OP_WRITEV in io_uring.h
163     static final byte IORING_OP_FSYNC = 3; // Specified by IORING_OP_FSYNC in io_uring.h
164     static final byte IORING_OP_READ_FIXED = 4; // Specified by IORING_OP_READ_FIXED in io_uring.h
165     static final byte IORING_OP_WRITE_FIXED = 5; // Specified by IORING_OP_WRITE_FIXED in io_uring.h
166     static final byte IORING_OP_POLL_ADD = 6; // Specified by IORING_OP_POLL_ADD in io_uring.h
167     static final byte IORING_OP_POLL_REMOVE = 7; // Specified by IORING_OP_POLL_REMOVE in io_uring.h
168     static final byte IORING_OP_SYNC_FILE_RANGE = 8; // Specified by IORING_OP_SYNC_FILE_RANGE in io_uring.h
169     static final byte IORING_OP_SENDMSG = 9; // Specified by IORING_OP_SENDMSG in io_uring.h
170     static final byte IORING_OP_RECVMSG = 10; // Specified by IORING_OP_RECVMSG in io_uring.h
171     static final byte IORING_OP_TIMEOUT = 11; // Specified by IORING_OP_TIMEOUT in io_uring.h
172     static final byte IORING_OP_TIMEOUT_REMOVE = 12; // Specified by IORING_OP_TIMEOUT_REMOVE in io_uring.h
173     static final byte IORING_OP_ACCEPT = 13; // Specified by IORING_OP_ACCEPT in io_uring.h
174     static final byte IORING_OP_ASYNC_CANCEL = 14; // Specified by IORING_OP_ASYNC_CANCEL in io_uring.h
175     static final byte IORING_OP_LINK_TIMEOUT = 15; // Specified by IORING_OP_LINK_TIMEOUT in io_uring.h
176     static final byte IORING_OP_CONNECT = 16; // Specified by IORING_OP_CONNECT in io_uring.h
177     static final byte IORING_OP_FALLOCATE = 17; // Specified by IORING_OP_FALLOCATE in io_uring.h
178     static final byte IORING_OP_OPENAT = 18; // Specified by IORING_OP_OPENAT in io_uring.h
179     static final byte IORING_OP_CLOSE = 19; // Specified by IORING_OP_CLOSE in io_uring.h
180     static final byte IORING_OP_FILES_UPDATE = 20; // Specified by IORING_OP_FILES_UPDATE in io_uring.h
181     static final byte IORING_OP_STATX = 21; // Specified by IORING_OP_STATX in io_uring.h
182     static final byte IORING_OP_READ = 22; // Specified by IORING_OP_READ in io_uring.h
183     static final byte IORING_OP_WRITE = 23; // Specified by IORING_OP_WRITE in io_uring.h
184     static final byte IORING_OP_FADVISE = 24; // Specified by IORING_OP_FADVISE in io_uring.h
185     static final byte IORING_OP_MADVISE = 25; // Specified by IORING_OP_MADVISE in io_uring.h
186     static final byte IORING_OP_SEND = 26; // Specified by IORING_OP_SEND in io_uring.h
187     static final byte IORING_OP_RECV = 27; // Specified by IORING_OP_RECV in io_uring.h
188     static final byte IORING_OP_OPENAT2 = 28; // Specified by IORING_OP_OPENAT2 in io_uring.h
189     static final byte IORING_OP_EPOLL_CTL = 29; // Specified by IORING_OP_EPOLL_CTL in io_uring.h
190     static final byte IORING_OP_SPLICE = 30; // Specified by IORING_OP_SPLICE in io_uring.h
191     static final byte IORING_OP_PROVIDE_BUFFERS = 31; // Specified by IORING_OP_PROVIDE_BUFFERS in io_uring.h
192     static final byte IORING_OP_REMOVE_BUFFERS = 32; // Specified by IORING_OP_REMOVE_BUFFERS in io_uring.h
193     static final byte IORING_OP_TEE = 33; // Specified by IORING_OP_TEE in io_uring.h
194     static final byte IORING_OP_SHUTDOWN = 34; // Specified by IORING_OP_SHUTDOWN in io_uring.h
195     static final byte IORING_OP_RENAMEAT = 35; // Specified by IORING_OP_RENAMEAT in io_uring.h
196     static final byte IORING_OP_UNLINKAT = 36; // Specified by IORING_OP_UNLINKAT in io_uring.h
197     static final byte IORING_OP_MKDIRAT = 37; // Specified by IORING_OP_MKDIRAT in io_uring.h
198     static final byte IORING_OP_SYMLINKAT = 38; // Specified by IORING_OP_SYMLINKAT in io_uring.h
199     static final byte IORING_OP_LINKAT = 39; // Specified by IORING_OP_LINKAT in io_uring.h
200     static final byte IORING_OP_MSG_RING = 40;
201     static final byte IORING_OP_FSETXATTR = 41;
202     static final byte IORING_OP_SETXATTR = 42;
203     static final byte IORING_OP_FGETXATTR = 43;
204     static final byte IORING_OP_GETXATTR = 44;
205     static final byte IORING_OP_SOCKET = 45;
206     static final byte IORING_OP_URING_CMD = 46;
207     static final byte IORING_OP_SEND_ZC = 47;
208     static final byte IORING_OP_SENDMSG_ZC = 48;
209     static final byte IORING_OP_READ_MULTISHOT = 49;
210     static final byte IORING_OP_WAITID = 50;
211     static final byte IORING_OP_FUTEX_WAIT = 51;
212     static final byte IORING_OP_FUTEX_WAKE = 52;
213     static final byte IORING_OP_FUTEX_WAITV = 53;
214     static final byte IORING_OP_FIXED_FD_INSTALL = 54;
215     static final byte IORING_OP_FTRUNCATE = 55;
216     static final byte IORING_OP_BIND = 56;
217     static final int IORING_CQE_F_BUFFER = 1 << 0;
218     static final int IORING_CQE_F_MORE = 1 << 1;
219     static final int IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
220     static final int IORING_CQE_F_NOTIF = 1 << 3;
221     static final int IORING_CQE_F_BUF_MORE = 1 << 4;
222     static final int IORING_CQE_F_SKIP = 1 << 5;
223     static final int IORING_CQE_F_32 = 1 << 15;
224     static final int IORING_SETUP_CQSIZE = 1 << 3;
225     static final int IORING_SETUP_CLAMP = 1 << 4;
226 
227     static final int IORING_SETUP_R_DISABLED = 1 << 6;
228     static final int IORING_SETUP_SUBMIT_ALL = 1 << 7;
229     static final int IORING_SETUP_CQE32 = 1 << 11;
230 
231     static final int IORING_SETUP_TASKRUN_FLAG = 1 << 9;
232     static final int IORING_SETUP_SINGLE_ISSUER = 1 << 12;
233     static final int IORING_SETUP_DEFER_TASKRUN = 1 << 13;
234     static final int IORING_SETUP_NO_SQARRAY = 1 << 16;
235     static final int IORING_SETUP_CQE_MIXED = 1 << 18;
236     static final int IORING_CQE_BUFFER_SHIFT = 16;
237 
238     static final short IORING_POLL_ADD_MULTI = 1 << 0;
239 
240     static final short IORING_RECVSEND_POLL_FIRST = 1 << 0;
241     static final short IORING_RECVSEND_BUNDLE = 1 << 4;
242     static final short IORING_RECV_MULTISHOT = 1 << 1;
243     static final short IORING_SEND_ZC_REPORT_USAGE = 1 << 3;
244 
245     static final int IORING_NOTIF_USAGE_ZC_COPIED = 1 << 31;
246 
247     static final short IORING_ACCEPT_MULTISHOT = 1 << 0;
248     static final short IORING_ACCEPT_DONTWAIT = 1 << 1;
249     static final short IORING_ACCEPT_POLL_FIRST = 1 << 2;
250 
251     static final short IORING_NOP_CQE32 = 1 << 5;
252 
253     static final int IORING_FEAT_NODROP = 1 << 1;
254     static final int IORING_FEAT_SUBMIT_STABLE = 1 << 2;
255     static final int IORING_FEAT_RECVSEND_BUNDLE = 1 << 14;
256 
257     static final int IORING_SQ_NEED_WAKEUP = 1 << 0;
258     static final int IORING_SQ_CQ_OVERFLOW = 1 << 1;
259     static final int IORING_SQ_TASKRUN = 1 << 2;
260 
261     static final int SPLICE_F_MOVE = 1;
262 
263     static final int IOU_PBUF_RING_INC = 2;
264     static final int IO_URING_OP_SUPPORTED = 1;
265 
266     static final int CQE_SIZE = 16;
267     static final int CQE32_SIZE = 32;
268 
269     static String opToStr(byte op) {
270         switch (op) {
271             case IORING_OP_NOP: return "NOP";
272             case IORING_OP_READV: return "READV";
273             case IORING_OP_WRITEV: return "WRITEV";
274             case IORING_OP_FSYNC: return "FSYNC";
275             case IORING_OP_READ_FIXED: return "READ_FIXED";
276             case IORING_OP_WRITE_FIXED: return "WRITE_FIXED";
277             case IORING_OP_POLL_ADD: return "POLL_ADD";
278             case IORING_OP_POLL_REMOVE: return "POLL_REMOVE";
279             case IORING_OP_SYNC_FILE_RANGE: return "SYNC_FILE_RANGE";
280             case IORING_OP_SENDMSG: return "SENDMSG";
281             case IORING_OP_RECVMSG: return "RECVMSG";
282             case IORING_OP_TIMEOUT: return "TIMEOUT";
283             case IORING_OP_TIMEOUT_REMOVE: return "TIMEOUT_REMOVE";
284             case IORING_OP_ACCEPT: return "ACCEPT";
285             case IORING_OP_ASYNC_CANCEL: return "ASYNC_CANCEL";
286             case IORING_OP_LINK_TIMEOUT: return "LINK_TIMEOUT";
287             case IORING_OP_CONNECT: return "CONNECT";
288             case IORING_OP_FALLOCATE: return "FALLOCATE";
289             case IORING_OP_OPENAT: return "OPENAT";
290             case IORING_OP_CLOSE: return "CLOSE";
291             case IORING_OP_FILES_UPDATE: return "FILES_UPDATE";
292             case IORING_OP_STATX: return "STATX";
293             case IORING_OP_READ: return "READ";
294             case IORING_OP_WRITE: return "WRITE";
295             case IORING_OP_FADVISE: return "FADVISE";
296             case IORING_OP_MADVISE: return "MADVISE";
297             case IORING_OP_SEND: return "SEND";
298             case IORING_OP_RECV: return "RECV";
299             case IORING_OP_OPENAT2: return "OPENAT2";
300             case IORING_OP_EPOLL_CTL: return "EPOLL_CTL";
301             case IORING_OP_SPLICE: return "SPLICE";
302             case IORING_OP_PROVIDE_BUFFERS: return "PROVIDE_BUFFERS";
303             case IORING_OP_REMOVE_BUFFERS: return "REMOVE_BUFFERS";
304             case IORING_OP_TEE: return "TEE";
305             case IORING_OP_SHUTDOWN: return "SHUTDOWN";
306             case IORING_OP_RENAMEAT: return "RENAMEAT";
307             case IORING_OP_UNLINKAT: return "UNLINKAT";
308             case IORING_OP_MKDIRAT: return "MKDIRAT";
309             case IORING_OP_SYMLINKAT: return "SYMLINKAT";
310             case IORING_OP_LINKAT: return "LINKAT";
311             case IORING_OP_SEND_ZC: return "SEND_ZC";
312             case IORING_OP_SENDMSG_ZC: return "SENDMSG_ZC";
313             default: return "[OP CODE " + op + ']';
314         }
315     }
316 
317     static final int IORING_ENTER_GETEVENTS = NativeStaticallyReferencedJniMethods.ioringEnterGetevents();
318     static final int IORING_ENTER_REGISTERED_RING = 1 << 4;
319     static final int IOSQE_ASYNC = NativeStaticallyReferencedJniMethods.iosqeAsync();
320     static final int IOSQE_LINK = NativeStaticallyReferencedJniMethods.iosqeLink();
321     static final int IOSQE_IO_DRAIN = NativeStaticallyReferencedJniMethods.iosqeDrain();
322     static final int IOSQE_BUFFER_SELECT = NativeStaticallyReferencedJniMethods.iosqeBufferSelect();
323     static final int IOSQE_CQE_SKIP_SUCCESS = 1 << 6;
324     static final int MSG_DONTWAIT = NativeStaticallyReferencedJniMethods.msgDontwait();
325     static final int MSG_FASTOPEN = NativeStaticallyReferencedJniMethods.msgFastopen();
326     static final int SOL_UDP = NativeStaticallyReferencedJniMethods.solUdp();
327     static final int SOL_SOCKET = NativeStaticallyReferencedJniMethods.solSocket();
328     static final int UDP_SEGMENT = NativeStaticallyReferencedJniMethods.udpSegment();
329     static final int SCM_RIGHTS = NativeStaticallyReferencedJniMethods.scmRights();
330     private static final int TFO_ENABLED_CLIENT_MASK = 0x1;
331     private static final int TFO_ENABLED_SERVER_MASK = 0x2;
332     private static final int TCP_FASTOPEN_MODE = NativeStaticallyReferencedJniMethods.tcpFastopenMode();
333     /**
334      * <a href ="https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt">tcp_fastopen</a> client mode enabled
335      * state.
336      */
337     static final boolean IS_SUPPORTING_TCP_FASTOPEN_CLIENT =
338             (TCP_FASTOPEN_MODE & TFO_ENABLED_CLIENT_MASK) == TFO_ENABLED_CLIENT_MASK;
339     /**
340      * <a href ="https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt">tcp_fastopen</a> server mode enabled
341      * state.
342      */
343     static final boolean IS_SUPPORTING_TCP_FASTOPEN_SERVER =
344             (TCP_FASTOPEN_MODE & TFO_ENABLED_SERVER_MASK) == TFO_ENABLED_SERVER_MASK;
345 
346     private static final int[] REQUIRED_IORING_OPS = {
347             IORING_OP_POLL_ADD,
348             IORING_OP_TIMEOUT,
349             IORING_OP_ACCEPT,
350             IORING_OP_READ,
351             IORING_OP_WRITE,
352             IORING_OP_POLL_REMOVE,
353             IORING_OP_CONNECT,
354             IORING_OP_CLOSE,
355             IORING_OP_WRITEV,
356             IORING_OP_SENDMSG,
357             IORING_OP_RECVMSG,
358             IORING_OP_ASYNC_CANCEL,
359             IORING_OP_RECV,
360             IORING_OP_NOP,
361             IORING_OP_SHUTDOWN,
362             IORING_OP_SEND
363     };
364 
365     static int setupFlags(boolean useSingleIssuer) {
366         int flags = Native.IORING_SETUP_R_DISABLED | Native.IORING_SETUP_CLAMP;
367         if (IoUring.isSetupSubmitAllSupported()) {
368             flags |= Native.IORING_SETUP_SUBMIT_ALL;
369         }
370 
371         if (useSingleIssuer) {
372             // See https://github.com/axboe/liburing/wiki/io_uring-and-networking-in-2023#task-work
373             if (IoUring.isSetupSingleIssuerSupported()) {
374                 flags |= Native.IORING_SETUP_SINGLE_ISSUER;
375             }
376             // IORING_SETUP_DEFER_TASKRUN also requires IORING_SETUP_SINGLE_ISSUER.
377             if (IoUring.isSetupDeferTaskrunSupported()) {
378                 flags |= Native.IORING_SETUP_DEFER_TASKRUN;
379                 flags |= Native.IORING_SETUP_TASKRUN_FLAG;
380             }
381         }
382         // liburing uses IORING_SETUP_NO_SQARRAY by default these days, we should do the same by default if possible.
383         // See https://github.com/axboe/liburing/releases/tag/liburing-2.6
384         if (IoUring.isIoringSetupNoSqarraySupported()) {
385             flags  |= Native.IORING_SETUP_NO_SQARRAY;
386         }
387 
388         // Use IORING_SETUP_CQE_MIXED by default if supported so we can support any OP in the future.
389         if (IoUring.isSetupCqeMixedSupported()) {
390             flags |= Native.IORING_SETUP_CQE_MIXED;
391         }
392         return flags;
393     }
394 
395     static RingBuffer createRingBuffer(int ringSize, int setupFlags) {
396         return createRingBuffer(ringSize, ringSize * 2, setupFlags);
397     }
398 
399     static RingBuffer createRingBuffer(int ringSize, int cqeSize, int setupFlags) {
400         ObjectUtil.checkPositive(ringSize, "ringSize");
401         ObjectUtil.checkPositive(cqeSize, "cqeSize");
402         long[] values = ioUringSetup(ringSize, cqeSize, setupFlags);
403         assert values.length == 20;
404         long cqkhead = values[0];
405         long cqktail = values[1];
406         int cqringMask = (int) values[2];
407         int cqringEntries = (int) values[3];
408         long cqkflags = values[4];
409         long cqArrayAddress = values[5];
410         int cqringSize = (int) values[6];
411         long cqringAddress = values[7];
412         int cqringFd = (int) values[8];
413         int cqringCapacity = (int) values[9];
414         int cqeLength = (setupFlags & IORING_SETUP_CQE32) == 0 ? CQE_SIZE : CQE32_SIZE;
415         boolean extraCqeDataNeeded = (setupFlags & (IORING_SETUP_CQE32 | IORING_SETUP_CQE_MIXED)) != 0;
416         CompletionQueue completionQueue = new CompletionQueue(
417                 Buffer.wrapMemoryAddressWithNativeOrder(cqkhead, Integer.BYTES),
418                 Buffer.wrapMemoryAddressWithNativeOrder(cqktail, Integer.BYTES),
419                 cqringMask,
420                 cqringEntries,
421                 Buffer.wrapMemoryAddressWithNativeOrder(cqkflags, Integer.BYTES),
422                 Buffer.wrapMemoryAddressWithNativeOrder(cqArrayAddress, cqringEntries * cqeLength),
423                 cqringSize,
424                 cqringAddress,
425                 cqringFd,
426                 cqringCapacity, cqeLength, extraCqeDataNeeded);
427 
428         long sqkhead = values[10];
429         long sqktail = values[11];
430         int sqringMask = (int) values[12];
431         int sqringEntries = (int) values[13];
432         long sqkflags = values[14];
433         long sqArrayAddress = values[15];
434         int sqringSize = (int) values[16];
435         long sqringAddress = values[17];
436         int sqringFd = (int) values[18];
437         SubmissionQueue submissionQueue = new SubmissionQueue(
438                 Buffer.wrapMemoryAddressWithNativeOrder(sqkhead, Integer.BYTES),
439                 Buffer.wrapMemoryAddressWithNativeOrder(sqktail, Integer.BYTES),
440                 sqringMask,
441                 sqringEntries,
442                 Buffer.wrapMemoryAddressWithNativeOrder(sqkflags, Integer.BYTES),
443                 Buffer.wrapMemoryAddressWithNativeOrder(sqArrayAddress, sqringEntries * SubmissionQueue.SQE_SIZE),
444                 sqringSize,
445                 sqringAddress,
446                 sqringFd);
447         return new RingBuffer(submissionQueue, completionQueue, (int) values[19]);
448     }
449 
450     static void checkAllIOSupported(IoUringProbe probe) {
451         if (!ioUringProbe(probe, REQUIRED_IORING_OPS)) {
452             throw new UnsupportedOperationException("Not all operations are supported: "
453                     + Arrays.toString(REQUIRED_IORING_OPS));
454         }
455     }
456 
457     static boolean isRecvMultishotSupported() {
458         // Added in the same release as IORING_SETUP_SINGLE_ISSUER.
459         return Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SINGLE_ISSUER);
460     }
461 
462     static boolean isAcceptMultishotSupported(IoUringProbe probe) {
463         // IORING_OP_SOCKET was added in the same release (5.19);
464         return ioUringProbe(probe, new int[] { Native.IORING_OP_SOCKET });
465     }
466 
467     static boolean isCqeFSockNonEmptySupported(IoUringProbe probe) {
468         // IORING_OP_SOCKET was added in the same release (5.19);
469         return ioUringProbe(probe, new int[] { Native.IORING_OP_SOCKET });
470     }
471 
472     static boolean isSpliceSupported(IoUringProbe probe) {
473         // IORING_OP_SPLICE Available since 5.7
474         return ioUringProbe(probe, new int[] { Native.IORING_OP_SPLICE });
475     }
476 
477     static boolean isPollAddMultiShotSupported(IoUringProbe probe) {
478         // Was added in the same release and we also need this feature to correctly handle edge-triggered mode.
479         return isCqeFSockNonEmptySupported(probe);
480     }
481 
482     static boolean isSendZcSupported(IoUringProbe probe) {
483         // IORING_OP_SEND_ZC Available since 6.0
484         return ioUringProbe(probe, new int[] { Native.IORING_OP_SEND_ZC });
485     }
486 
487     static boolean isSendmsgZcSupported(IoUringProbe probe) {
488         // IORING_OP_SENDMSG_ZC Available since 6.0
489         return ioUringProbe(probe, new int[] { Native.IORING_OP_SENDMSG_ZC });
490     }
491 
492     /**
493      * check current kernel version whether support io_uring_register_io_wq_worker
494      * Available since 5.15.
495      * @return true if support io_uring_register_io_wq_worker
496      */
497     static boolean isRegisterIoWqWorkerSupported(int ringFd) {
498         // See https://github.com/torvalds/linux/blob/v5.5/fs/io_uring.c#L5488C10-L5488C16
499         int result = ioUringRegisterIoWqMaxWorkers(ringFd, 0, 0);
500         if (result >= 0) {
501             return true;
502         }
503         // This is not supported and so will return -EINVAL
504         return false;
505     }
506 
507     static boolean isRegisterBufferRingSupported(int ringFd, int flags) {
508         int entries = 2;
509         short bgid = 1;
510         long result = ioUringRegisterBufRing(ringFd, entries, bgid, flags);
511         if (result >= 0) {
512             ioUringUnRegisterBufRing(ringFd, result, entries, bgid);
513             return true;
514         }
515         // This is not supported and so will return -EINVAL
516         return false;
517     }
518 
519     static void checkKernelVersion(String kernelVersion) {
520         boolean enforceKernelVersion = SystemPropertyUtil.getBoolean(
521                 "io.netty.transport.iouring.enforceKernelVersion", true);
522         boolean kernelSupported = checkKernelVersion(kernelVersion, 5, 9);
523         if (!kernelSupported) {
524             if (enforceKernelVersion) {
525                 throw new UnsupportedOperationException(
526                         "you need at least kernel version 5.9, current kernel version: " + kernelVersion);
527             } else {
528                 logger.debug("Detected kernel " + kernelVersion + " does not match minimum version of 5.9, " +
529                         "trying to use io_uring anyway");
530             }
531         }
532     }
533 
534     private static boolean checkKernelVersion(String kernelVersion, int major, int minor) {
535         String[] versionComponents = kernelVersion.split("\\.");
536         if (versionComponents.length < 3) {
537             return false;
538         }
539         int nativeMajor;
540         try {
541             nativeMajor = Integer.parseInt(versionComponents[0]);
542         } catch (NumberFormatException e) {
543             return false;
544         }
545 
546         if (nativeMajor < major) {
547             return false;
548         }
549 
550         if (nativeMajor > major) {
551             return true;
552         }
553 
554         int nativeMinor;
555         try {
556             nativeMinor = Integer.parseInt(versionComponents[1]);
557         } catch (NumberFormatException e) {
558             return false;
559         }
560 
561         return nativeMinor >= minor;
562     }
563 
564     static final class IoUringProbe {
565         final byte lastOp;
566         final byte opsLen;
567         final IoUringProbeOp[] ops;
568 
569         IoUringProbe(int[] values) {
570             int idx = 0;
571             lastOp = (byte) values[idx++];
572             opsLen = (byte) values[idx++];
573             ops  = new IoUringProbeOp[opsLen];
574             for (int i = 0; i < opsLen; i++) {
575                 ops[i] = new IoUringProbeOp((byte) values[idx++], values[idx++]);
576             }
577         }
578     }
579 
580     static class IoUringProbeOp {
581         final byte op;
582         final int flags;
583 
584         IoUringProbeOp(byte op, int flags) {
585             this.op = op;
586             this.flags = flags;
587         }
588     }
589 
590     static boolean ioUringProbe(IoUringProbe probe, int[] ops) {
591         IoUringProbeOp[] ioUringProbeOps = probe.ops;
592         if (ioUringProbeOps == null) {
593             return false;
594         }
595         for (int op : ops) {
596             if (op > probe.lastOp || (ioUringProbeOps[op].flags & IO_URING_OP_SUPPORTED) == 0) {
597                 return false;
598             }
599         }
600         return true;
601     }
602 
603     static native boolean ioUringSetupSupportsFlags(int setupFlags);
604     private static native long[] ioUringSetup(int entries, int cqeSize, int setupFlags);
605 
606     static IoUringProbe ioUringProbe(int ringfd) {
607         int[] values = ioUringProbe0(ringfd);
608         if (values == null) {
609             return null;
610         }
611         return new IoUringProbe(values);
612     }
613     private static native int[] ioUringProbe0(int ringFd);
614 
615     static native int ioUringRegisterIoWqMaxWorkers(int ringFd, int maxBoundedValue, int maxUnboundedValue);
616     static native int ioUringRegisterEnableRings(int ringFd);
617     static native int ioUringRegisterRingFds(int ringFds);
618 
619     static native long ioUringRegisterBufRing(int ringFd, int entries, short bufferGroup, int flags);
620     static native int ioUringUnRegisterBufRing(int ringFd, long ioUringBufRingAddr, int entries, short bufferGroupId);
621     static native int ioUringBufRingSize(int entries);
622     static native int ioUringEnter(int ringFd, int toSubmit, int minComplete, int flags);
623 
624     static native void eventFdWrite(int fd, long value);
625 
626     static int getFd(DefaultFileRegion fileChannel) {
627         return getFd0(fileChannel);
628     }
629 
630     private static native int getFd0(Object fileChannel);
631 
632     static FileDescriptor newBlockingEventFd() {
633         return new FileDescriptor(blockingEventFd());
634     }
635 
636     static native void ioUringExit(long submissionQueueArrayAddress, int submissionQueueRingEntries,
637                                           long submissionQueueRingAddress, int submissionQueueRingSize,
638                                           long completionQueueRingAddress, int completionQueueRingSize,
639                                           int ringFd, int enterRingFd);
640 
641     private static native int blockingEventFd();
642 
643     // for testing only!
644     static native int createFile(String name);
645 
646     private static native int registerUnix();
647 
648     static native long cmsghdrData(long hdrAddr);
649 
650     static native String kernelVersion();
651 
652     private Native() {
653         // utility
654     }
655 
656     // From io_uring native library
657     private static void loadNativeLibrary() {
658         String name = PlatformDependent.normalizedOs().toLowerCase(Locale.ROOT).trim();
659         if (!name.startsWith("linux")) {
660             throw new IllegalStateException("Only supported on Linux");
661         }
662         String staticLibName = "netty_transport_native_io_uring42";
663         String sharedLibName = staticLibName + '_' + PlatformDependent.normalizedArch();
664         ClassLoader cl = PlatformDependent.getClassLoader(Native.class);
665         try {
666             NativeLibraryLoader.load(sharedLibName, cl);
667         } catch (UnsatisfiedLinkError e1) {
668             try {
669                 NativeLibraryLoader.load(staticLibName, cl);
670                 logger.info("Failed to load io_uring");
671             } catch (UnsatisfiedLinkError e2) {
672                 ThrowableUtil.addSuppressed(e1, e2);
673                 throw e1;
674             }
675         }
676     }
677 }