1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package io.netty.channel.uring;
17
18 import io.netty.channel.DefaultFileRegion;
19 import io.netty.util.internal.ObjectUtil;
20 import io.netty.util.internal.logging.InternalLogger;
21 import io.netty.util.internal.logging.InternalLoggerFactory;
22 import io.netty.channel.unix.FileDescriptor;
23 import io.netty.channel.unix.PeerCredentials;
24 import io.netty.channel.unix.Unix;
25 import io.netty.util.internal.ClassInitializerUtil;
26 import io.netty.util.internal.NativeLibraryLoader;
27 import io.netty.util.internal.PlatformDependent;
28 import io.netty.util.internal.SystemPropertyUtil;
29 import io.netty.util.internal.ThrowableUtil;
30
31 import java.io.File;
32 import java.io.IOException;
33 import java.nio.channels.Selector;
34 import java.nio.file.Path;
35 import java.util.Arrays;
36 import java.util.Locale;
37
38 final class Native {
39 private static final InternalLogger logger = InternalLoggerFactory.getInstance(Native.class);
40 static final int DEFAULT_RING_SIZE = Math.max(64, SystemPropertyUtil.getInt("io.netty.iouring.ringSize", 4096));
41
42 static {
43 Selector selector = null;
44 try {
45
46
47
48
49 selector = Selector.open();
50 } catch (IOException ignore) {
51
52 }
53
54
55
56
57
58
59 ClassInitializerUtil.tryLoadClasses(
60 Native.class,
61
62 PeerCredentials.class, java.io.FileDescriptor.class
63 );
64
65 File tmpDir = PlatformDependent.tmpdir();
66 Path tmpFile = tmpDir.toPath().resolve("netty_io_uring.tmp");
67 try {
68
69
70 Native.createFile(tmpFile.toString());
71 } catch (UnsatisfiedLinkError ignore) {
72
73 loadNativeLibrary();
74 } finally {
75 tmpFile.toFile().delete();
76 try {
77 if (selector != null) {
78 selector.close();
79 }
80 } catch (IOException ignore) {
81
82 }
83 }
84 Unix.registerInternal(Native::registerUnix);
85 }
86
87 static final int SOCK_NONBLOCK = NativeStaticallyReferencedJniMethods.sockNonblock();
88 static final int SOCK_CLOEXEC = NativeStaticallyReferencedJniMethods.sockCloexec();
89 static final short AF_INET = (short) NativeStaticallyReferencedJniMethods.afInet();
90 static final short AF_INET6 = (short) NativeStaticallyReferencedJniMethods.afInet6();
91 static final int SIZEOF_SOCKADDR_STORAGE = NativeStaticallyReferencedJniMethods.sizeofSockaddrStorage();
92 static final int SIZEOF_SOCKADDR_IN = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn();
93 static final int SIZEOF_SOCKADDR_IN6 = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn6();
94 static final int SOCKADDR_IN_OFFSETOF_SIN_FAMILY =
95 NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinFamily();
96 static final int SOCKADDR_IN_OFFSETOF_SIN_PORT = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinPort();
97 static final int SOCKADDR_IN_OFFSETOF_SIN_ADDR = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinAddr();
98 static final int IN_ADDRESS_OFFSETOF_S_ADDR = NativeStaticallyReferencedJniMethods.inAddressOffsetofSAddr();
99 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FAMILY =
100 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Family();
101 static final int SOCKADDR_IN6_OFFSETOF_SIN6_PORT =
102 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Port();
103 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FLOWINFO =
104 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Flowinfo();
105 static final int SOCKADDR_IN6_OFFSETOF_SIN6_ADDR =
106 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Addr();
107 static final int SOCKADDR_IN6_OFFSETOF_SIN6_SCOPE_ID =
108 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6ScopeId();
109 static final int IN6_ADDRESS_OFFSETOF_S6_ADDR = NativeStaticallyReferencedJniMethods.in6AddressOffsetofS6Addr();
110 static final int SIZEOF_SIZE_T = NativeStaticallyReferencedJniMethods.sizeofSizeT();
111 static final int SIZEOF_IOVEC = NativeStaticallyReferencedJniMethods.sizeofIovec();
112 static final int CMSG_SPACE = NativeStaticallyReferencedJniMethods.cmsgSpace();
113 static final int CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsgLen();
114 static final int CMSG_OFFSETOF_CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLen();
115 static final int CMSG_OFFSETOF_CMSG_LEVEL = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLevel();
116 static final int CMSG_OFFSETOF_CMSG_TYPE = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgType();
117
118 static final int IOVEC_OFFSETOF_IOV_BASE = NativeStaticallyReferencedJniMethods.iovecOffsetofIovBase();
119 static final int IOVEC_OFFSETOF_IOV_LEN = NativeStaticallyReferencedJniMethods.iovecOffsetofIovLen();
120 static final int SIZEOF_MSGHDR = NativeStaticallyReferencedJniMethods.sizeofMsghdr();
121 static final int MSGHDR_OFFSETOF_MSG_NAME = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgName();
122 static final int MSGHDR_OFFSETOF_MSG_NAMELEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgNamelen();
123 static final int MSGHDR_OFFSETOF_MSG_IOV = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIov();
124 static final int MSGHDR_OFFSETOF_MSG_IOVLEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIovlen();
125 static final int MSGHDR_OFFSETOF_MSG_CONTROL = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControl();
126 static final int MSGHDR_OFFSETOF_MSG_CONTROLLEN =
127 NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControllen();
128 static final int MSGHDR_OFFSETOF_MSG_FLAGS = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgFlags();
129 static final int POLLIN = NativeStaticallyReferencedJniMethods.pollin();
130 static final int POLLOUT = NativeStaticallyReferencedJniMethods.pollout();
131 static final int POLLRDHUP = NativeStaticallyReferencedJniMethods.pollrdhup();
132 static final int ERRNO_ECANCELED_NEGATIVE = -NativeStaticallyReferencedJniMethods.ecanceled();
133 static final int ERRNO_ETIME_NEGATIVE = -NativeStaticallyReferencedJniMethods.etime();
134
135
136
137
138 static final byte IORING_OP_NOP = 0;
139 static final byte IORING_OP_READV = 1;
140 static final byte IORING_OP_WRITEV = 2;
141 static final byte IORING_OP_FSYNC = 3;
142 static final byte IORING_OP_READ_FIXED = 4;
143 static final byte IORING_OP_WRITE_FIXED = 5;
144 static final byte IORING_OP_POLL_ADD = 6;
145 static final byte IORING_OP_POLL_REMOVE = 7;
146 static final byte IORING_OP_SYNC_FILE_RANGE = 8;
147 static final byte IORING_OP_SENDMSG = 9;
148 static final byte IORING_OP_RECVMSG = 10;
149 static final byte IORING_OP_TIMEOUT = 11;
150 static final byte IORING_OP_TIMEOUT_REMOVE = 12;
151 static final byte IORING_OP_ACCEPT = 13;
152 static final byte IORING_OP_ASYNC_CANCEL = 14;
153 static final byte IORING_OP_LINK_TIMEOUT = 15;
154 static final byte IORING_OP_CONNECT = 16;
155 static final byte IORING_OP_FALLOCATE = 17;
156 static final byte IORING_OP_OPENAT = 18;
157 static final byte IORING_OP_CLOSE = 19;
158 static final byte IORING_OP_FILES_UPDATE = 20;
159 static final byte IORING_OP_STATX = 21;
160 static final byte IORING_OP_READ = 22;
161 static final byte IORING_OP_WRITE = 23;
162 static final byte IORING_OP_FADVISE = 24;
163 static final byte IORING_OP_MADVISE = 25;
164 static final byte IORING_OP_SEND = 26;
165 static final byte IORING_OP_RECV = 27;
166 static final byte IORING_OP_OPENAT2 = 28;
167 static final byte IORING_OP_EPOLL_CTL = 29;
168 static final byte IORING_OP_SPLICE = 30;
169 static final byte IORING_OP_PROVIDE_BUFFERS = 31;
170 static final byte IORING_OP_REMOVE_BUFFERS = 32;
171 static final byte IORING_OP_TEE = 33;
172 static final byte IORING_OP_SHUTDOWN = 34;
173 static final byte IORING_OP_RENAMEAT = 35;
174 static final byte IORING_OP_UNLINKAT = 36;
175 static final byte IORING_OP_MKDIRAT = 37;
176 static final byte IORING_OP_SYMLINKAT = 38;
177 static final byte IORING_OP_LINKAT = 39;
178 static final byte IORING_OP_MSG_RING = 40;
179 static final byte IORING_OP_FSETXATTR = 41;
180 static final byte IORING_OP_SETXATTR = 42;
181 static final byte IORING_OP_FGETXATTR = 43;
182 static final byte IORING_OP_GETXATTR = 44;
183 static final byte IORING_OP_SOCKET = 45;
184 static final byte IORING_OP_URING_CMD = 46;
185 static final byte IORING_OP_SEND_ZC = 47;
186 static final byte IORING_OP_SENDMSG_ZC = 48;
187 static final byte IORING_OP_READ_MULTISHOT = 49;
188 static final byte IORING_OP_WAITID = 50;
189 static final byte IORING_OP_FUTEX_WAIT = 51;
190 static final byte IORING_OP_FUTEX_WAKE = 52;
191 static final byte IORING_OP_FUTEX_WAITV = 53;
192 static final byte IORING_OP_FIXED_FD_INSTALL = 54;
193 static final byte IORING_OP_FTRUNCATE = 55;
194 static final byte IORING_OP_BIND = 56;
195 static final byte IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
196
197 static final short IORING_RECVSEND_POLL_FIRST = 1 << 0;
198 static final short IORING_ACCEPT_DONTWAIT = 1 << 1;
199 static final short IORING_ACCEPT_POLL_FIRST = 1 << 2;
200 static final int IORING_FEAT_RECVSEND_BUNDLE = 1 << 14;
201 static final int SPLICE_F_MOVE = 1;
202
203 static String opToStr(byte op) {
204 switch (op) {
205 case IORING_OP_NOP: return "NOP";
206 case IORING_OP_READV: return "READV";
207 case IORING_OP_WRITEV: return "WRITEV";
208 case IORING_OP_FSYNC: return "FSYNC";
209 case IORING_OP_READ_FIXED: return "READ_FIXED";
210 case IORING_OP_WRITE_FIXED: return "WRITE_FIXED";
211 case IORING_OP_POLL_ADD: return "POLL_ADD";
212 case IORING_OP_POLL_REMOVE: return "POLL_REMOVE";
213 case IORING_OP_SYNC_FILE_RANGE: return "SYNC_FILE_RANGE";
214 case IORING_OP_SENDMSG: return "SENDMSG";
215 case IORING_OP_RECVMSG: return "RECVMSG";
216 case IORING_OP_TIMEOUT: return "TIMEOUT";
217 case IORING_OP_TIMEOUT_REMOVE: return "TIMEOUT_REMOVE";
218 case IORING_OP_ACCEPT: return "ACCEPT";
219 case IORING_OP_ASYNC_CANCEL: return "ASYNC_CANCEL";
220 case IORING_OP_LINK_TIMEOUT: return "LINK_TIMEOUT";
221 case IORING_OP_CONNECT: return "CONNECT";
222 case IORING_OP_FALLOCATE: return "FALLOCATE";
223 case IORING_OP_OPENAT: return "OPENAT";
224 case IORING_OP_CLOSE: return "CLOSE";
225 case IORING_OP_FILES_UPDATE: return "FILES_UPDATE";
226 case IORING_OP_STATX: return "STATX";
227 case IORING_OP_READ: return "READ";
228 case IORING_OP_WRITE: return "WRITE";
229 case IORING_OP_FADVISE: return "FADVISE";
230 case IORING_OP_MADVISE: return "MADVISE";
231 case IORING_OP_SEND: return "SEND";
232 case IORING_OP_RECV: return "RECV";
233 case IORING_OP_OPENAT2: return "OPENAT2";
234 case IORING_OP_EPOLL_CTL: return "EPOLL_CTL";
235 case IORING_OP_SPLICE: return "SPLICE";
236 case IORING_OP_PROVIDE_BUFFERS: return "PROVIDE_BUFFERS";
237 case IORING_OP_REMOVE_BUFFERS: return "REMOVE_BUFFERS";
238 case IORING_OP_TEE: return "TEE";
239 case IORING_OP_SHUTDOWN: return "SHUTDOWN";
240 case IORING_OP_RENAMEAT: return "RENAMEAT";
241 case IORING_OP_UNLINKAT: return "UNLINKAT";
242 case IORING_OP_MKDIRAT: return "MKDIRAT";
243 case IORING_OP_SYMLINKAT: return "SYMLINKAT";
244 case IORING_OP_LINKAT: return "LINKAT";
245 default: return "[OP CODE " + op + ']';
246 }
247 }
248
249 static final int IORING_ENTER_GETEVENTS = NativeStaticallyReferencedJniMethods.ioringEnterGetevents();
250 static final int IOSQE_ASYNC = NativeStaticallyReferencedJniMethods.iosqeAsync();
251 static final int IOSQE_LINK = NativeStaticallyReferencedJniMethods.iosqeLink();
252 static final int IOSQE_IO_DRAIN = NativeStaticallyReferencedJniMethods.iosqeDrain();
253 static final int MSG_DONTWAIT = NativeStaticallyReferencedJniMethods.msgDontwait();
254 static final int MSG_FASTOPEN = NativeStaticallyReferencedJniMethods.msgFastopen();
255 static final int SOL_UDP = NativeStaticallyReferencedJniMethods.solUdp();
256 static final int UDP_SEGMENT = NativeStaticallyReferencedJniMethods.udpSegment();
257 private static final int TFO_ENABLED_CLIENT_MASK = 0x1;
258 private static final int TFO_ENABLED_SERVER_MASK = 0x2;
259 private static final int TCP_FASTOPEN_MODE = NativeStaticallyReferencedJniMethods.tcpFastopenMode();
260
261
262
263
264 static final boolean IS_SUPPORTING_TCP_FASTOPEN_CLIENT =
265 (TCP_FASTOPEN_MODE & TFO_ENABLED_CLIENT_MASK) == TFO_ENABLED_CLIENT_MASK;
266
267
268
269
270 static final boolean IS_SUPPORTING_TCP_FASTOPEN_SERVER =
271 (TCP_FASTOPEN_MODE & TFO_ENABLED_SERVER_MASK) == TFO_ENABLED_SERVER_MASK;
272
273 private static final int[] REQUIRED_IORING_OPS = {
274 IORING_OP_POLL_ADD,
275 IORING_OP_TIMEOUT,
276 IORING_OP_ACCEPT,
277 IORING_OP_READ,
278 IORING_OP_WRITE,
279 IORING_OP_POLL_REMOVE,
280 IORING_OP_CONNECT,
281 IORING_OP_CLOSE,
282 IORING_OP_WRITEV,
283 IORING_OP_SENDMSG,
284 IORING_OP_RECVMSG,
285 IORING_OP_ASYNC_CANCEL,
286 IORING_OP_RECV,
287 IORING_OP_NOP,
288 IORING_OP_SHUTDOWN,
289 IORING_OP_SEND
290 };
291
292 static RingBuffer createRingBuffer() {
293 return createRingBuffer(DEFAULT_RING_SIZE);
294 }
295
296 static RingBuffer createRingBuffer(int ringSize) {
297 ObjectUtil.checkPositive(ringSize, "ringSize");
298 long[] values = ioUringSetup(ringSize);
299 assert values.length == 21;
300 CompletionQueue completionQueue = new CompletionQueue(
301 values[0],
302 values[1],
303 values[2],
304 values[3],
305 values[4],
306 values[5],
307 (int) values[6],
308 values[7],
309 (int) values[8]);
310 SubmissionQueue submissionQueue = new SubmissionQueue(
311 values[9],
312 values[10],
313 values[11],
314 values[12],
315 values[13],
316 values[14],
317 values[15],
318 values[16],
319 (int) values[17],
320 values[18],
321 (int) values[19]);
322 return new RingBuffer(submissionQueue, completionQueue, (int) values[20]);
323 }
324
325 static void checkAllIOSupported(int ringFd) {
326 if (!ioUringProbe(ringFd, REQUIRED_IORING_OPS)) {
327 throw new UnsupportedOperationException("Not all operations are supported: "
328 + Arrays.toString(REQUIRED_IORING_OPS));
329 }
330 }
331
332 static boolean isIOUringCqeFSockNonEmptySupported(int ringFd) {
333
334 return ioUringProbe(ringFd, new int[] { Native.IORING_OP_SOCKET });
335 }
336
337 static boolean isIOUringSupportSplice(int ringFd) {
338
339 return ioUringProbe(ringFd, new int[] { Native.IORING_OP_SPLICE });
340 }
341
342
343
344
345
346
347 static boolean isRegisterIOWQWorkerSupported(int ringFd) {
348
349 int result = ioUringRegisterIoWqMaxWorkers(ringFd, 0, 0);
350 if (result >= 0) {
351 return true;
352 }
353
354 return false;
355 }
356
357 static void checkKernelVersion(String kernelVersion) {
358 boolean enforceKernelVersion = SystemPropertyUtil.getBoolean(
359 "io.netty.transport.iouring.enforceKernelVersion", true);
360 boolean kernelSupported = checkKernelVersion(kernelVersion, 5, 9);
361 if (!kernelSupported) {
362 if (enforceKernelVersion) {
363 throw new UnsupportedOperationException(
364 "you need at least kernel version 5.9, current kernel version: " + kernelVersion);
365 } else {
366 logger.debug("Detected kernel " + kernelVersion + " does not match minimum version of 5.9, " +
367 "trying to use io_uring anyway");
368 }
369 }
370 }
371
372 private static boolean checkKernelVersion(String kernelVersion, int major, int minor) {
373 String[] versionComponents = kernelVersion.split("\\.");
374 if (versionComponents.length < 3) {
375 return false;
376 }
377 int nativeMajor;
378 try {
379 nativeMajor = Integer.parseInt(versionComponents[0]);
380 } catch (NumberFormatException e) {
381 return false;
382 }
383
384 if (nativeMajor < major) {
385 return false;
386 }
387
388 if (nativeMajor > major) {
389 return true;
390 }
391
392 int nativeMinor;
393 try {
394 nativeMinor = Integer.parseInt(versionComponents[1]);
395 } catch (NumberFormatException e) {
396 return false;
397 }
398
399 return nativeMinor >= minor;
400 }
401
402 private static native boolean ioUringProbe(int ringFd, int[] ios);
403 private static native long[] ioUringSetup(int entries);
404
405 static native int ioUringRegisterIoWqMaxWorkers(int ringFd, int maxBoundedValue, int maxUnboundedValue);
406
407 static native int ioUringEnter(int ringFd, int toSubmit, int minComplete, int flags);
408
409 static native void eventFdWrite(int fd, long value);
410
411 static int getFd(DefaultFileRegion fileChannel) {
412 return getFd0(fileChannel);
413 }
414
415 private static native int getFd0(Object fileChannel);
416
417 static FileDescriptor newBlockingEventFd() {
418 return new FileDescriptor(blockingEventFd());
419 }
420
421 static native void ioUringExit(long submissionQueueArrayAddress, int submissionQueueRingEntries,
422 long submissionQueueRingAddress, int submissionQueueRingSize,
423 long completionQueueRingAddress, int completionQueueRingSize,
424 int ringFd);
425
426 private static native int blockingEventFd();
427
428
429 static native int createFile(String name);
430
431 private static native int registerUnix();
432
433 static native long cmsghdrData(long hdrAddr);
434
435 static native String kernelVersion();
436
437 private Native() {
438
439 }
440
441
442 private static void loadNativeLibrary() {
443 String name = PlatformDependent.normalizedOs().toLowerCase(Locale.ROOT).trim();
444 if (!name.startsWith("linux")) {
445 throw new IllegalStateException("Only supported on Linux");
446 }
447 String staticLibName = "netty_transport_native_io_uring";
448 String sharedLibName = staticLibName + '_' + PlatformDependent.normalizedArch();
449 ClassLoader cl = PlatformDependent.getClassLoader(Native.class);
450 try {
451 NativeLibraryLoader.load(sharedLibName, cl);
452 } catch (UnsatisfiedLinkError e1) {
453 try {
454 NativeLibraryLoader.load(staticLibName, cl);
455 logger.info("Failed to load io_uring");
456 } catch (UnsatisfiedLinkError e2) {
457 ThrowableUtil.addSuppressed(e1, e2);
458 throw e1;
459 }
460 }
461 }
462 }