1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package io.netty.channel.uring;
17
18 import io.netty.channel.DefaultFileRegion;
19 import io.netty.channel.unix.Buffer;
20 import io.netty.util.internal.ObjectUtil;
21 import io.netty.util.internal.logging.InternalLogger;
22 import io.netty.util.internal.logging.InternalLoggerFactory;
23 import io.netty.channel.unix.FileDescriptor;
24 import io.netty.channel.unix.PeerCredentials;
25 import io.netty.channel.unix.Unix;
26 import io.netty.util.internal.ClassInitializerUtil;
27 import io.netty.util.internal.NativeLibraryLoader;
28 import io.netty.util.internal.PlatformDependent;
29 import io.netty.util.internal.SystemPropertyUtil;
30 import io.netty.util.internal.ThrowableUtil;
31
32 import java.io.File;
33 import java.io.IOException;
34 import java.nio.channels.Selector;
35 import java.nio.file.Path;
36 import java.util.Arrays;
37 import java.util.Locale;
38
39 final class Native {
40 private static final InternalLogger logger = InternalLoggerFactory.getInstance(Native.class);
41
42 static {
43 Selector selector = null;
44 try {
45
46
47
48
49 selector = Selector.open();
50 } catch (IOException ignore) {
51
52 }
53
54
55
56
57
58
59 ClassInitializerUtil.tryLoadClasses(
60 Native.class,
61
62 PeerCredentials.class, java.io.FileDescriptor.class
63 );
64
65 File tmpDir = PlatformDependent.tmpdir();
66 Path tmpFile = tmpDir.toPath().resolve("netty_io_uring.tmp");
67 try {
68
69
70 Native.createFile(tmpFile.toString());
71 } catch (UnsatisfiedLinkError ignore) {
72
73 loadNativeLibrary();
74 } finally {
75 tmpFile.toFile().delete();
76 try {
77 if (selector != null) {
78 selector.close();
79 }
80 } catch (IOException ignore) {
81
82 }
83 }
84 Unix.registerInternal(Native::registerUnix);
85 }
86
87 static final int SOCK_NONBLOCK = NativeStaticallyReferencedJniMethods.sockNonblock();
88 static final int SOCK_CLOEXEC = NativeStaticallyReferencedJniMethods.sockCloexec();
89 static final short AF_INET = (short) NativeStaticallyReferencedJniMethods.afInet();
90 static final short AF_INET6 = (short) NativeStaticallyReferencedJniMethods.afInet6();
91 static final short AF_UNIX = (short) NativeStaticallyReferencedJniMethods.afUnix();
92 static final int SIZEOF_SOCKADDR_STORAGE = NativeStaticallyReferencedJniMethods.sizeofSockaddrStorage();
93 static final int SIZEOF_SOCKADDR_UN = NativeStaticallyReferencedJniMethods.sizeofSockaddrUn();
94 static final int SOCKADDR_UN_OFFSETOF_SUN_FAMILY =
95 NativeStaticallyReferencedJniMethods.sockaddrUnOffsetofSunFamily();
96 static final int SOCKADDR_UN_OFFSETOF_SUN_PATH =
97 NativeStaticallyReferencedJniMethods.sockaddrUnOffsetofSunPath();
98 static final int MAX_SUN_PATH_LEN =
99 NativeStaticallyReferencedJniMethods.maxSunPathLen();
100 static final int SIZEOF_SOCKADDR_IN = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn();
101 static final int SIZEOF_SOCKADDR_IN6 = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn6();
102 static final int SOCKADDR_IN_OFFSETOF_SIN_FAMILY =
103 NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinFamily();
104 static final int SOCKADDR_IN_OFFSETOF_SIN_PORT = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinPort();
105 static final int SOCKADDR_IN_OFFSETOF_SIN_ADDR = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinAddr();
106 static final int IN_ADDRESS_OFFSETOF_S_ADDR = NativeStaticallyReferencedJniMethods.inAddressOffsetofSAddr();
107 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FAMILY =
108 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Family();
109 static final int SOCKADDR_IN6_OFFSETOF_SIN6_PORT =
110 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Port();
111 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FLOWINFO =
112 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Flowinfo();
113 static final int SOCKADDR_IN6_OFFSETOF_SIN6_ADDR =
114 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Addr();
115 static final int SOCKADDR_IN6_OFFSETOF_SIN6_SCOPE_ID =
116 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6ScopeId();
117 static final int IN6_ADDRESS_OFFSETOF_S6_ADDR = NativeStaticallyReferencedJniMethods.in6AddressOffsetofS6Addr();
118 static final int SIZEOF_SIZE_T = NativeStaticallyReferencedJniMethods.sizeofSizeT();
119 static final int SIZEOF_IOVEC = NativeStaticallyReferencedJniMethods.sizeofIovec();
120 static final int CMSG_SPACE = NativeStaticallyReferencedJniMethods.cmsgSpace();
121 static final int CMSG_SPACE_FOR_FD = NativeStaticallyReferencedJniMethods.cmsgSpaceForFd();
122 static final int CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsgLen();
123 static final int CMSG_LEN_FOR_FD = NativeStaticallyReferencedJniMethods.cmsgLenForFd();
124 static final int MSG_CONTROL_LEN_FOR_FD = NativeStaticallyReferencedJniMethods.msgControlLenForFd();
125 static final int CMSG_OFFSETOF_CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLen();
126 static final int CMSG_OFFSETOF_CMSG_LEVEL = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLevel();
127 static final int CMSG_OFFSETOF_CMSG_TYPE = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgType();
128
129 static final int IO_URING_BUFFER_RING_TAIL = NativeStaticallyReferencedJniMethods.ioUringBufferRingOffsetTail();
130
131 static final int IOVEC_OFFSETOF_IOV_BASE = NativeStaticallyReferencedJniMethods.iovecOffsetofIovBase();
132 static final int IOVEC_OFFSETOF_IOV_LEN = NativeStaticallyReferencedJniMethods.iovecOffsetofIovLen();
133 static final int SIZEOF_MSGHDR = NativeStaticallyReferencedJniMethods.sizeofMsghdr();
134 static final int MSGHDR_OFFSETOF_MSG_NAME = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgName();
135 static final int MSGHDR_OFFSETOF_MSG_NAMELEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgNamelen();
136 static final int MSGHDR_OFFSETOF_MSG_IOV = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIov();
137 static final int MSGHDR_OFFSETOF_MSG_IOVLEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIovlen();
138 static final int MSGHDR_OFFSETOF_MSG_CONTROL = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControl();
139 static final int MSGHDR_OFFSETOF_MSG_CONTROLLEN =
140 NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControllen();
141 static final int MSGHDR_OFFSETOF_MSG_FLAGS = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgFlags();
142 static final int POLLIN = NativeStaticallyReferencedJniMethods.pollin();
143 static final int POLLOUT = NativeStaticallyReferencedJniMethods.pollout();
144 static final int POLLRDHUP = NativeStaticallyReferencedJniMethods.pollrdhup();
145 static final int ERRNO_ECANCELED_NEGATIVE = -NativeStaticallyReferencedJniMethods.ecanceled();
146 static final int ERRNO_ETIME_NEGATIVE = -NativeStaticallyReferencedJniMethods.etime();
147 static final int ERRNO_NOBUFS_NEGATIVE = -NativeStaticallyReferencedJniMethods.enobufs();
148
149 static final int PAGE_SIZE = NativeStaticallyReferencedJniMethods.pageSize();
150 static final int MAX_SKB_FRAGS = NativeStaticallyReferencedJniMethods.maxSkbFrags();
151
152 static final int SIZEOF_IOURING_BUF = NativeStaticallyReferencedJniMethods.sizeofIoUringBuf();
153 static final int IOURING_BUFFER_OFFSETOF_ADDR = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetAddr();
154 static final int IOURING_BUFFER_OFFSETOF_LEN = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetLen();
155 static final int IOURING_BUFFER_OFFSETOF_BID = NativeStaticallyReferencedJniMethods.ioUringBufferOffsetBid();
156
157
158
159
160 static final byte IORING_OP_NOP = 0;
161 static final byte IORING_OP_READV = 1;
162 static final byte IORING_OP_WRITEV = 2;
163 static final byte IORING_OP_FSYNC = 3;
164 static final byte IORING_OP_READ_FIXED = 4;
165 static final byte IORING_OP_WRITE_FIXED = 5;
166 static final byte IORING_OP_POLL_ADD = 6;
167 static final byte IORING_OP_POLL_REMOVE = 7;
168 static final byte IORING_OP_SYNC_FILE_RANGE = 8;
169 static final byte IORING_OP_SENDMSG = 9;
170 static final byte IORING_OP_RECVMSG = 10;
171 static final byte IORING_OP_TIMEOUT = 11;
172 static final byte IORING_OP_TIMEOUT_REMOVE = 12;
173 static final byte IORING_OP_ACCEPT = 13;
174 static final byte IORING_OP_ASYNC_CANCEL = 14;
175 static final byte IORING_OP_LINK_TIMEOUT = 15;
176 static final byte IORING_OP_CONNECT = 16;
177 static final byte IORING_OP_FALLOCATE = 17;
178 static final byte IORING_OP_OPENAT = 18;
179 static final byte IORING_OP_CLOSE = 19;
180 static final byte IORING_OP_FILES_UPDATE = 20;
181 static final byte IORING_OP_STATX = 21;
182 static final byte IORING_OP_READ = 22;
183 static final byte IORING_OP_WRITE = 23;
184 static final byte IORING_OP_FADVISE = 24;
185 static final byte IORING_OP_MADVISE = 25;
186 static final byte IORING_OP_SEND = 26;
187 static final byte IORING_OP_RECV = 27;
188 static final byte IORING_OP_OPENAT2 = 28;
189 static final byte IORING_OP_EPOLL_CTL = 29;
190 static final byte IORING_OP_SPLICE = 30;
191 static final byte IORING_OP_PROVIDE_BUFFERS = 31;
192 static final byte IORING_OP_REMOVE_BUFFERS = 32;
193 static final byte IORING_OP_TEE = 33;
194 static final byte IORING_OP_SHUTDOWN = 34;
195 static final byte IORING_OP_RENAMEAT = 35;
196 static final byte IORING_OP_UNLINKAT = 36;
197 static final byte IORING_OP_MKDIRAT = 37;
198 static final byte IORING_OP_SYMLINKAT = 38;
199 static final byte IORING_OP_LINKAT = 39;
200 static final byte IORING_OP_MSG_RING = 40;
201 static final byte IORING_OP_FSETXATTR = 41;
202 static final byte IORING_OP_SETXATTR = 42;
203 static final byte IORING_OP_FGETXATTR = 43;
204 static final byte IORING_OP_GETXATTR = 44;
205 static final byte IORING_OP_SOCKET = 45;
206 static final byte IORING_OP_URING_CMD = 46;
207 static final byte IORING_OP_SEND_ZC = 47;
208 static final byte IORING_OP_SENDMSG_ZC = 48;
209 static final byte IORING_OP_READ_MULTISHOT = 49;
210 static final byte IORING_OP_WAITID = 50;
211 static final byte IORING_OP_FUTEX_WAIT = 51;
212 static final byte IORING_OP_FUTEX_WAKE = 52;
213 static final byte IORING_OP_FUTEX_WAITV = 53;
214 static final byte IORING_OP_FIXED_FD_INSTALL = 54;
215 static final byte IORING_OP_FTRUNCATE = 55;
216 static final byte IORING_OP_BIND = 56;
217 static final int IORING_CQE_F_BUFFER = 1 << 0;
218 static final int IORING_CQE_F_MORE = 1 << 1;
219 static final int IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
220 static final int IORING_CQE_F_NOTIF = 1 << 3;
221 static final int IORING_CQE_F_BUF_MORE = 1 << 4;
222 static final int IORING_CQE_F_SKIP = 1 << 5;
223 static final int IORING_CQE_F_32 = 1 << 15;
224 static final int IORING_SETUP_CQSIZE = 1 << 3;
225 static final int IORING_SETUP_CLAMP = 1 << 4;
226
227 static final int IORING_SETUP_R_DISABLED = 1 << 6;
228 static final int IORING_SETUP_SUBMIT_ALL = 1 << 7;
229 static final int IORING_SETUP_CQE32 = 1 << 11;
230
231 static final int IORING_SETUP_SINGLE_ISSUER = 1 << 12;
232 static final int IORING_SETUP_DEFER_TASKRUN = 1 << 13;
233 static final int IORING_SETUP_NO_SQARRAY = 1 << 16;
234 static final int IORING_SETUP_CQE_MIXED = 1 << 18;
235 static final int IORING_CQE_BUFFER_SHIFT = 16;
236
237 static final short IORING_POLL_ADD_MULTI = 1 << 0;
238
239 static final short IORING_RECVSEND_POLL_FIRST = 1 << 0;
240 static final short IORING_RECVSEND_BUNDLE = 1 << 4;
241 static final short IORING_RECV_MULTISHOT = 1 << 1;
242 static final short IORING_SEND_ZC_REPORT_USAGE = 1 << 3;
243
244 static final int IORING_NOTIF_USAGE_ZC_COPIED = 1 << 31;
245
246 static final short IORING_ACCEPT_MULTISHOT = 1 << 0;
247 static final short IORING_ACCEPT_DONTWAIT = 1 << 1;
248 static final short IORING_ACCEPT_POLL_FIRST = 1 << 2;
249
250 static final short IORING_NOP_CQE32 = 1 << 5;
251
252 static final int IORING_FEAT_NODROP = 1 << 1;
253 static final int IORING_FEAT_SUBMIT_STABLE = 1 << 2;
254 static final int IORING_FEAT_RECVSEND_BUNDLE = 1 << 14;
255
256 static final int IORING_SQ_NEED_WAKEUP = 1 << 0;
257 static final int IORING_SQ_CQ_OVERFLOW = 1 << 1;
258 static final int IORING_SQ_TASKRUN = 1 << 2;
259
260 static final int SPLICE_F_MOVE = 1;
261
262 static final int IOU_PBUF_RING_INC = 2;
263 static final int IO_URING_OP_SUPPORTED = 1;
264
265 static final int CQE_SIZE = 16;
266 static final int CQE32_SIZE = 32;
267
268 static String opToStr(byte op) {
269 switch (op) {
270 case IORING_OP_NOP: return "NOP";
271 case IORING_OP_READV: return "READV";
272 case IORING_OP_WRITEV: return "WRITEV";
273 case IORING_OP_FSYNC: return "FSYNC";
274 case IORING_OP_READ_FIXED: return "READ_FIXED";
275 case IORING_OP_WRITE_FIXED: return "WRITE_FIXED";
276 case IORING_OP_POLL_ADD: return "POLL_ADD";
277 case IORING_OP_POLL_REMOVE: return "POLL_REMOVE";
278 case IORING_OP_SYNC_FILE_RANGE: return "SYNC_FILE_RANGE";
279 case IORING_OP_SENDMSG: return "SENDMSG";
280 case IORING_OP_RECVMSG: return "RECVMSG";
281 case IORING_OP_TIMEOUT: return "TIMEOUT";
282 case IORING_OP_TIMEOUT_REMOVE: return "TIMEOUT_REMOVE";
283 case IORING_OP_ACCEPT: return "ACCEPT";
284 case IORING_OP_ASYNC_CANCEL: return "ASYNC_CANCEL";
285 case IORING_OP_LINK_TIMEOUT: return "LINK_TIMEOUT";
286 case IORING_OP_CONNECT: return "CONNECT";
287 case IORING_OP_FALLOCATE: return "FALLOCATE";
288 case IORING_OP_OPENAT: return "OPENAT";
289 case IORING_OP_CLOSE: return "CLOSE";
290 case IORING_OP_FILES_UPDATE: return "FILES_UPDATE";
291 case IORING_OP_STATX: return "STATX";
292 case IORING_OP_READ: return "READ";
293 case IORING_OP_WRITE: return "WRITE";
294 case IORING_OP_FADVISE: return "FADVISE";
295 case IORING_OP_MADVISE: return "MADVISE";
296 case IORING_OP_SEND: return "SEND";
297 case IORING_OP_RECV: return "RECV";
298 case IORING_OP_OPENAT2: return "OPENAT2";
299 case IORING_OP_EPOLL_CTL: return "EPOLL_CTL";
300 case IORING_OP_SPLICE: return "SPLICE";
301 case IORING_OP_PROVIDE_BUFFERS: return "PROVIDE_BUFFERS";
302 case IORING_OP_REMOVE_BUFFERS: return "REMOVE_BUFFERS";
303 case IORING_OP_TEE: return "TEE";
304 case IORING_OP_SHUTDOWN: return "SHUTDOWN";
305 case IORING_OP_RENAMEAT: return "RENAMEAT";
306 case IORING_OP_UNLINKAT: return "UNLINKAT";
307 case IORING_OP_MKDIRAT: return "MKDIRAT";
308 case IORING_OP_SYMLINKAT: return "SYMLINKAT";
309 case IORING_OP_LINKAT: return "LINKAT";
310 case IORING_OP_SEND_ZC: return "SEND_ZC";
311 case IORING_OP_SENDMSG_ZC: return "SENDMSG_ZC";
312 default: return "[OP CODE " + op + ']';
313 }
314 }
315
316 static final int IORING_ENTER_GETEVENTS = NativeStaticallyReferencedJniMethods.ioringEnterGetevents();
317 static final int IORING_ENTER_REGISTERED_RING = 1 << 4;
318 static final int IOSQE_ASYNC = NativeStaticallyReferencedJniMethods.iosqeAsync();
319 static final int IOSQE_LINK = NativeStaticallyReferencedJniMethods.iosqeLink();
320 static final int IOSQE_IO_DRAIN = NativeStaticallyReferencedJniMethods.iosqeDrain();
321 static final int IOSQE_BUFFER_SELECT = NativeStaticallyReferencedJniMethods.iosqeBufferSelect();
322 static final int IOSQE_CQE_SKIP_SUCCESS = 1 << 6;
323 static final int MSG_DONTWAIT = NativeStaticallyReferencedJniMethods.msgDontwait();
324 static final int MSG_FASTOPEN = NativeStaticallyReferencedJniMethods.msgFastopen();
325 static final int SOL_UDP = NativeStaticallyReferencedJniMethods.solUdp();
326 static final int SOL_SOCKET = NativeStaticallyReferencedJniMethods.solSocket();
327 static final int UDP_SEGMENT = NativeStaticallyReferencedJniMethods.udpSegment();
328 static final int SCM_RIGHTS = NativeStaticallyReferencedJniMethods.scmRights();
329 private static final int TFO_ENABLED_CLIENT_MASK = 0x1;
330 private static final int TFO_ENABLED_SERVER_MASK = 0x2;
331 private static final int TCP_FASTOPEN_MODE = NativeStaticallyReferencedJniMethods.tcpFastopenMode();
332
333
334
335
336 static final boolean IS_SUPPORTING_TCP_FASTOPEN_CLIENT =
337 (TCP_FASTOPEN_MODE & TFO_ENABLED_CLIENT_MASK) == TFO_ENABLED_CLIENT_MASK;
338
339
340
341
342 static final boolean IS_SUPPORTING_TCP_FASTOPEN_SERVER =
343 (TCP_FASTOPEN_MODE & TFO_ENABLED_SERVER_MASK) == TFO_ENABLED_SERVER_MASK;
344
345 private static final int[] REQUIRED_IORING_OPS = {
346 IORING_OP_POLL_ADD,
347 IORING_OP_TIMEOUT,
348 IORING_OP_ACCEPT,
349 IORING_OP_READ,
350 IORING_OP_WRITE,
351 IORING_OP_POLL_REMOVE,
352 IORING_OP_CONNECT,
353 IORING_OP_CLOSE,
354 IORING_OP_WRITEV,
355 IORING_OP_SENDMSG,
356 IORING_OP_RECVMSG,
357 IORING_OP_ASYNC_CANCEL,
358 IORING_OP_RECV,
359 IORING_OP_NOP,
360 IORING_OP_SHUTDOWN,
361 IORING_OP_SEND
362 };
363
364 static int setupFlags(boolean useSingleIssuer) {
365 int flags = Native.IORING_SETUP_R_DISABLED | Native.IORING_SETUP_CLAMP;
366 if (IoUring.isSetupSubmitAllSupported()) {
367 flags |= Native.IORING_SETUP_SUBMIT_ALL;
368 }
369
370 if (useSingleIssuer) {
371
372 if (IoUring.isSetupSingleIssuerSupported()) {
373 flags |= Native.IORING_SETUP_SINGLE_ISSUER;
374 }
375
376 if (IoUring.isSetupDeferTaskrunSupported()) {
377 flags |= Native.IORING_SETUP_DEFER_TASKRUN;
378 }
379 }
380
381
382 if (IoUring.isIoringSetupNoSqarraySupported()) {
383 flags |= Native.IORING_SETUP_NO_SQARRAY;
384 }
385
386
387 if (IoUring.isSetupCqeMixedSupported()) {
388 flags |= Native.IORING_SETUP_CQE_MIXED;
389 }
390 return flags;
391 }
392
393 static RingBuffer createRingBuffer(int ringSize, int setupFlags) {
394 return createRingBuffer(ringSize, ringSize * 2, setupFlags);
395 }
396
397 static RingBuffer createRingBuffer(int ringSize, int cqeSize, int setupFlags) {
398 ObjectUtil.checkPositive(ringSize, "ringSize");
399 ObjectUtil.checkPositive(cqeSize, "cqeSize");
400 long[] values = ioUringSetup(ringSize, cqeSize, setupFlags);
401 assert values.length == 20;
402 long cqkhead = values[0];
403 long cqktail = values[1];
404 int cqringMask = (int) values[2];
405 int cqringEntries = (int) values[3];
406 long cqkflags = values[4];
407 long cqArrayAddress = values[5];
408 int cqringSize = (int) values[6];
409 long cqringAddress = values[7];
410 int cqringFd = (int) values[8];
411 int cqringCapacity = (int) values[9];
412 int cqeLength = (setupFlags & IORING_SETUP_CQE32) == 0 ? CQE_SIZE : CQE32_SIZE;
413 boolean extraCqeDataNeeded = (setupFlags & (IORING_SETUP_CQE32 | IORING_SETUP_CQE_MIXED)) != 0;
414 CompletionQueue completionQueue = new CompletionQueue(
415 Buffer.wrapMemoryAddressWithNativeOrder(cqkhead, Integer.BYTES),
416 Buffer.wrapMemoryAddressWithNativeOrder(cqktail, Integer.BYTES),
417 cqringMask,
418 cqringEntries,
419 Buffer.wrapMemoryAddressWithNativeOrder(cqkflags, Integer.BYTES),
420 Buffer.wrapMemoryAddressWithNativeOrder(cqArrayAddress, cqringEntries * cqeLength),
421 cqringSize,
422 cqringAddress,
423 cqringFd,
424 cqringCapacity, cqeLength, extraCqeDataNeeded);
425
426 long sqkhead = values[10];
427 long sqktail = values[11];
428 int sqringMask = (int) values[12];
429 int sqringEntries = (int) values[13];
430 long sqkflags = values[14];
431 long sqArrayAddress = values[15];
432 int sqringSize = (int) values[16];
433 long sqringAddress = values[17];
434 int sqringFd = (int) values[18];
435 SubmissionQueue submissionQueue = new SubmissionQueue(
436 Buffer.wrapMemoryAddressWithNativeOrder(sqkhead, Integer.BYTES),
437 Buffer.wrapMemoryAddressWithNativeOrder(sqktail, Integer.BYTES),
438 sqringMask,
439 sqringEntries,
440 Buffer.wrapMemoryAddressWithNativeOrder(sqkflags, Integer.BYTES),
441 Buffer.wrapMemoryAddressWithNativeOrder(sqArrayAddress, sqringEntries * SubmissionQueue.SQE_SIZE),
442 sqringSize,
443 sqringAddress,
444 sqringFd);
445 return new RingBuffer(submissionQueue, completionQueue, (int) values[19]);
446 }
447
448 static void checkAllIOSupported(IoUringProbe probe) {
449 if (!ioUringProbe(probe, REQUIRED_IORING_OPS)) {
450 throw new UnsupportedOperationException("Not all operations are supported: "
451 + Arrays.toString(REQUIRED_IORING_OPS));
452 }
453 }
454
455 static boolean isRecvMultishotSupported() {
456
457 return Native.ioUringSetupSupportsFlags(Native.IORING_SETUP_SINGLE_ISSUER);
458 }
459
460 static boolean isAcceptMultishotSupported(IoUringProbe probe) {
461
462 return ioUringProbe(probe, new int[] { Native.IORING_OP_SOCKET });
463 }
464
465 static boolean isCqeFSockNonEmptySupported(IoUringProbe probe) {
466
467 return ioUringProbe(probe, new int[] { Native.IORING_OP_SOCKET });
468 }
469
470 static boolean isSpliceSupported(IoUringProbe probe) {
471
472 return ioUringProbe(probe, new int[] { Native.IORING_OP_SPLICE });
473 }
474
475 static boolean isPollAddMultiShotSupported(IoUringProbe probe) {
476
477 return isCqeFSockNonEmptySupported(probe);
478 }
479
480 static boolean isSendZcSupported(IoUringProbe probe) {
481
482 return ioUringProbe(probe, new int[] { Native.IORING_OP_SEND_ZC });
483 }
484
485 static boolean isSendmsgZcSupported(IoUringProbe probe) {
486
487 return ioUringProbe(probe, new int[] { Native.IORING_OP_SENDMSG_ZC });
488 }
489
490
491
492
493
494
495 static boolean isRegisterIoWqWorkerSupported(int ringFd) {
496
497 int result = ioUringRegisterIoWqMaxWorkers(ringFd, 0, 0);
498 if (result >= 0) {
499 return true;
500 }
501
502 return false;
503 }
504
505 static boolean isRegisterBufferRingSupported(int ringFd, int flags) {
506 int entries = 2;
507 short bgid = 1;
508 long result = ioUringRegisterBufRing(ringFd, entries, bgid, flags);
509 if (result >= 0) {
510 ioUringUnRegisterBufRing(ringFd, result, entries, bgid);
511 return true;
512 }
513
514 return false;
515 }
516
517 static void checkKernelVersion(String kernelVersion) {
518 boolean enforceKernelVersion = SystemPropertyUtil.getBoolean(
519 "io.netty.transport.iouring.enforceKernelVersion", true);
520 boolean kernelSupported = checkKernelVersion(kernelVersion, 5, 9);
521 if (!kernelSupported) {
522 if (enforceKernelVersion) {
523 throw new UnsupportedOperationException(
524 "you need at least kernel version 5.9, current kernel version: " + kernelVersion);
525 } else {
526 logger.debug("Detected kernel " + kernelVersion + " does not match minimum version of 5.9, " +
527 "trying to use io_uring anyway");
528 }
529 }
530 }
531
532 private static boolean checkKernelVersion(String kernelVersion, int major, int minor) {
533 String[] versionComponents = kernelVersion.split("\\.");
534 if (versionComponents.length < 3) {
535 return false;
536 }
537 int nativeMajor;
538 try {
539 nativeMajor = Integer.parseInt(versionComponents[0]);
540 } catch (NumberFormatException e) {
541 return false;
542 }
543
544 if (nativeMajor < major) {
545 return false;
546 }
547
548 if (nativeMajor > major) {
549 return true;
550 }
551
552 int nativeMinor;
553 try {
554 nativeMinor = Integer.parseInt(versionComponents[1]);
555 } catch (NumberFormatException e) {
556 return false;
557 }
558
559 return nativeMinor >= minor;
560 }
561
562 static final class IoUringProbe {
563 final byte lastOp;
564 final byte opsLen;
565 final IoUringProbeOp[] ops;
566
567 IoUringProbe(int[] values) {
568 int idx = 0;
569 lastOp = (byte) values[idx++];
570 opsLen = (byte) values[idx++];
571 ops = new IoUringProbeOp[opsLen];
572 for (int i = 0; i < opsLen; i++) {
573 ops[i] = new IoUringProbeOp((byte) values[idx++], values[idx++]);
574 }
575 }
576 }
577
578 static class IoUringProbeOp {
579 final byte op;
580 final int flags;
581
582 IoUringProbeOp(byte op, int flags) {
583 this.op = op;
584 this.flags = flags;
585 }
586 }
587
588 static boolean ioUringProbe(IoUringProbe probe, int[] ops) {
589 IoUringProbeOp[] ioUringProbeOps = probe.ops;
590 if (ioUringProbeOps == null) {
591 return false;
592 }
593 for (int op : ops) {
594 if (op > probe.lastOp || (ioUringProbeOps[op].flags & IO_URING_OP_SUPPORTED) == 0) {
595 return false;
596 }
597 }
598 return true;
599 }
600
601 static native boolean ioUringSetupSupportsFlags(int setupFlags);
602 private static native long[] ioUringSetup(int entries, int cqeSize, int setupFlags);
603
604 static IoUringProbe ioUringProbe(int ringfd) {
605 int[] values = ioUringProbe0(ringfd);
606 if (values == null) {
607 return null;
608 }
609 return new IoUringProbe(values);
610 }
611 private static native int[] ioUringProbe0(int ringFd);
612
613 static native int ioUringRegisterIoWqMaxWorkers(int ringFd, int maxBoundedValue, int maxUnboundedValue);
614 static native int ioUringRegisterEnableRings(int ringFd);
615 static native int ioUringRegisterRingFds(int ringFds);
616
617 static native long ioUringRegisterBufRing(int ringFd, int entries, short bufferGroup, int flags);
618 static native int ioUringUnRegisterBufRing(int ringFd, long ioUringBufRingAddr, int entries, short bufferGroupId);
619 static native int ioUringBufRingSize(int entries);
620 static native int ioUringEnter(int ringFd, int toSubmit, int minComplete, int flags);
621
622 static native void eventFdWrite(int fd, long value);
623
624 static int getFd(DefaultFileRegion fileChannel) {
625 return getFd0(fileChannel);
626 }
627
628 private static native int getFd0(Object fileChannel);
629
630 static FileDescriptor newBlockingEventFd() {
631 return new FileDescriptor(blockingEventFd());
632 }
633
634 static native void ioUringExit(long submissionQueueArrayAddress, int submissionQueueRingEntries,
635 long submissionQueueRingAddress, int submissionQueueRingSize,
636 long completionQueueRingAddress, int completionQueueRingSize,
637 int ringFd, int enterRingFd);
638
639 private static native int blockingEventFd();
640
641
642 static native int createFile(String name);
643
644 private static native int registerUnix();
645
646 static native long cmsghdrData(long hdrAddr);
647
648 static native String kernelVersion();
649
650 private Native() {
651
652 }
653
654
655 private static void loadNativeLibrary() {
656 String name = PlatformDependent.normalizedOs().toLowerCase(Locale.ROOT).trim();
657 if (!name.startsWith("linux")) {
658 throw new IllegalStateException("Only supported on Linux");
659 }
660 String staticLibName = "netty_transport_native_io_uring42";
661 String sharedLibName = staticLibName + '_' + PlatformDependent.normalizedArch();
662 ClassLoader cl = PlatformDependent.getClassLoader(Native.class);
663 try {
664 NativeLibraryLoader.load(sharedLibName, cl);
665 } catch (UnsatisfiedLinkError e1) {
666 try {
667 NativeLibraryLoader.load(staticLibName, cl);
668 logger.info("Failed to load io_uring");
669 } catch (UnsatisfiedLinkError e2) {
670 ThrowableUtil.addSuppressed(e1, e2);
671 throw e1;
672 }
673 }
674 }
675 }