1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package io.netty.channel.uring;
17
18 import io.netty.channel.DefaultFileRegion;
19 import io.netty.util.internal.ObjectUtil;
20 import io.netty.util.internal.logging.InternalLogger;
21 import io.netty.util.internal.logging.InternalLoggerFactory;
22 import io.netty.channel.unix.FileDescriptor;
23 import io.netty.channel.unix.PeerCredentials;
24 import io.netty.channel.unix.Unix;
25 import io.netty.util.internal.ClassInitializerUtil;
26 import io.netty.util.internal.NativeLibraryLoader;
27 import io.netty.util.internal.PlatformDependent;
28 import io.netty.util.internal.SystemPropertyUtil;
29 import io.netty.util.internal.ThrowableUtil;
30
31 import java.io.File;
32 import java.io.IOException;
33 import java.nio.channels.FileChannel;
34 import java.nio.channels.Selector;
35 import java.nio.file.Path;
36 import java.util.Arrays;
37 import java.util.Locale;
38
39 final class Native {
40 private static final InternalLogger logger = InternalLoggerFactory.getInstance(Native.class);
41 static final int DEFAULT_RING_SIZE = Math.max(64, SystemPropertyUtil.getInt("io.netty.iouring.ringSize", 4096));
42
43 static {
44 Selector selector = null;
45 try {
46
47
48
49
50 selector = Selector.open();
51 } catch (IOException ignore) {
52
53 }
54
55
56
57
58
59
60 ClassInitializerUtil.tryLoadClasses(
61 Native.class,
62
63 PeerCredentials.class, java.io.FileDescriptor.class
64 );
65
66 File tmpDir = PlatformDependent.tmpdir();
67 Path tmpFile = tmpDir.toPath().resolve("netty_io_uring.tmp");
68 try {
69
70
71 Native.createFile(tmpFile.toString());
72 } catch (UnsatisfiedLinkError ignore) {
73
74 loadNativeLibrary();
75 } finally {
76 tmpFile.toFile().delete();
77 try {
78 if (selector != null) {
79 selector.close();
80 }
81 } catch (IOException ignore) {
82
83 }
84 }
85 Unix.registerInternal(Native::registerUnix);
86 }
87
88 static final int SOCK_NONBLOCK = NativeStaticallyReferencedJniMethods.sockNonblock();
89 static final int SOCK_CLOEXEC = NativeStaticallyReferencedJniMethods.sockCloexec();
90 static final short AF_INET = (short) NativeStaticallyReferencedJniMethods.afInet();
91 static final short AF_INET6 = (short) NativeStaticallyReferencedJniMethods.afInet6();
92 static final int SIZEOF_SOCKADDR_STORAGE = NativeStaticallyReferencedJniMethods.sizeofSockaddrStorage();
93 static final int SIZEOF_SOCKADDR_IN = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn();
94 static final int SIZEOF_SOCKADDR_IN6 = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn6();
95 static final int SOCKADDR_IN_OFFSETOF_SIN_FAMILY =
96 NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinFamily();
97 static final int SOCKADDR_IN_OFFSETOF_SIN_PORT = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinPort();
98 static final int SOCKADDR_IN_OFFSETOF_SIN_ADDR = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinAddr();
99 static final int IN_ADDRESS_OFFSETOF_S_ADDR = NativeStaticallyReferencedJniMethods.inAddressOffsetofSAddr();
100 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FAMILY =
101 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Family();
102 static final int SOCKADDR_IN6_OFFSETOF_SIN6_PORT =
103 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Port();
104 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FLOWINFO =
105 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Flowinfo();
106 static final int SOCKADDR_IN6_OFFSETOF_SIN6_ADDR =
107 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Addr();
108 static final int SOCKADDR_IN6_OFFSETOF_SIN6_SCOPE_ID =
109 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6ScopeId();
110 static final int IN6_ADDRESS_OFFSETOF_S6_ADDR = NativeStaticallyReferencedJniMethods.in6AddressOffsetofS6Addr();
111 static final int SIZEOF_SIZE_T = NativeStaticallyReferencedJniMethods.sizeofSizeT();
112 static final int SIZEOF_IOVEC = NativeStaticallyReferencedJniMethods.sizeofIovec();
113 static final int CMSG_SPACE = NativeStaticallyReferencedJniMethods.cmsgSpace();
114 static final int CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsgLen();
115 static final int CMSG_OFFSETOF_CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLen();
116 static final int CMSG_OFFSETOF_CMSG_LEVEL = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLevel();
117 static final int CMSG_OFFSETOF_CMSG_TYPE = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgType();
118
119 static final int IOVEC_OFFSETOF_IOV_BASE = NativeStaticallyReferencedJniMethods.iovecOffsetofIovBase();
120 static final int IOVEC_OFFSETOF_IOV_LEN = NativeStaticallyReferencedJniMethods.iovecOffsetofIovLen();
121 static final int SIZEOF_MSGHDR = NativeStaticallyReferencedJniMethods.sizeofMsghdr();
122 static final int MSGHDR_OFFSETOF_MSG_NAME = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgName();
123 static final int MSGHDR_OFFSETOF_MSG_NAMELEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgNamelen();
124 static final int MSGHDR_OFFSETOF_MSG_IOV = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIov();
125 static final int MSGHDR_OFFSETOF_MSG_IOVLEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIovlen();
126 static final int MSGHDR_OFFSETOF_MSG_CONTROL = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControl();
127 static final int MSGHDR_OFFSETOF_MSG_CONTROLLEN =
128 NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControllen();
129 static final int MSGHDR_OFFSETOF_MSG_FLAGS = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgFlags();
130 static final int POLLIN = NativeStaticallyReferencedJniMethods.pollin();
131 static final int POLLOUT = NativeStaticallyReferencedJniMethods.pollout();
132 static final int POLLRDHUP = NativeStaticallyReferencedJniMethods.pollrdhup();
133 static final int ERRNO_ECANCELED_NEGATIVE = -NativeStaticallyReferencedJniMethods.ecanceled();
134 static final int ERRNO_ETIME_NEGATIVE = -NativeStaticallyReferencedJniMethods.etime();
135
136
137
138
139 static final byte IORING_OP_NOP = 0;
140 static final byte IORING_OP_READV = 1;
141 static final byte IORING_OP_WRITEV = 2;
142 static final byte IORING_OP_FSYNC = 3;
143 static final byte IORING_OP_READ_FIXED = 4;
144 static final byte IORING_OP_WRITE_FIXED = 5;
145 static final byte IORING_OP_POLL_ADD = 6;
146 static final byte IORING_OP_POLL_REMOVE = 7;
147 static final byte IORING_OP_SYNC_FILE_RANGE = 8;
148 static final byte IORING_OP_SENDMSG = 9;
149 static final byte IORING_OP_RECVMSG = 10;
150 static final byte IORING_OP_TIMEOUT = 11;
151 static final byte IORING_OP_TIMEOUT_REMOVE = 12;
152 static final byte IORING_OP_ACCEPT = 13;
153 static final byte IORING_OP_ASYNC_CANCEL = 14;
154 static final byte IORING_OP_LINK_TIMEOUT = 15;
155 static final byte IORING_OP_CONNECT = 16;
156 static final byte IORING_OP_FALLOCATE = 17;
157 static final byte IORING_OP_OPENAT = 18;
158 static final byte IORING_OP_CLOSE = 19;
159 static final byte IORING_OP_FILES_UPDATE = 20;
160 static final byte IORING_OP_STATX = 21;
161 static final byte IORING_OP_READ = 22;
162 static final byte IORING_OP_WRITE = 23;
163 static final byte IORING_OP_FADVISE = 24;
164 static final byte IORING_OP_MADVISE = 25;
165 static final byte IORING_OP_SEND = 26;
166 static final byte IORING_OP_RECV = 27;
167 static final byte IORING_OP_OPENAT2 = 28;
168 static final byte IORING_OP_EPOLL_CTL = 29;
169 static final byte IORING_OP_SPLICE = 30;
170 static final byte IORING_OP_PROVIDE_BUFFERS = 31;
171 static final byte IORING_OP_REMOVE_BUFFERS = 32;
172 static final byte IORING_OP_TEE = 33;
173 static final byte IORING_OP_SHUTDOWN = 34;
174 static final byte IORING_OP_RENAMEAT = 35;
175 static final byte IORING_OP_UNLINKAT = 36;
176 static final byte IORING_OP_MKDIRAT = 37;
177 static final byte IORING_OP_SYMLINKAT = 38;
178 static final byte IORING_OP_LINKAT = 39;
179 static final byte IORING_OP_MSG_RING = 40;
180 static final byte IORING_OP_FSETXATTR = 41;
181 static final byte IORING_OP_SETXATTR = 42;
182 static final byte IORING_OP_FGETXATTR = 43;
183 static final byte IORING_OP_GETXATTR = 44;
184 static final byte IORING_OP_SOCKET = 45;
185 static final byte IORING_OP_URING_CMD = 46;
186 static final byte IORING_OP_SEND_ZC = 47;
187 static final byte IORING_OP_SENDMSG_ZC = 48;
188 static final byte IORING_OP_READ_MULTISHOT = 49;
189 static final byte IORING_OP_WAITID = 50;
190 static final byte IORING_OP_FUTEX_WAIT = 51;
191 static final byte IORING_OP_FUTEX_WAKE = 52;
192 static final byte IORING_OP_FUTEX_WAITV = 53;
193 static final byte IORING_OP_FIXED_FD_INSTALL = 54;
194 static final byte IORING_OP_FTRUNCATE = 55;
195 static final byte IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
196
197 static final int SPLICE_F_MOVE = 1;
198
199 static String opToStr(byte op) {
200 switch (op) {
201 case IORING_OP_NOP: return "NOP";
202 case IORING_OP_READV: return "READV";
203 case IORING_OP_WRITEV: return "WRITEV";
204 case IORING_OP_FSYNC: return "FSYNC";
205 case IORING_OP_READ_FIXED: return "READ_FIXED";
206 case IORING_OP_WRITE_FIXED: return "WRITE_FIXED";
207 case IORING_OP_POLL_ADD: return "POLL_ADD";
208 case IORING_OP_POLL_REMOVE: return "POLL_REMOVE";
209 case IORING_OP_SYNC_FILE_RANGE: return "SYNC_FILE_RANGE";
210 case IORING_OP_SENDMSG: return "SENDMSG";
211 case IORING_OP_RECVMSG: return "RECVMSG";
212 case IORING_OP_TIMEOUT: return "TIMEOUT";
213 case IORING_OP_TIMEOUT_REMOVE: return "TIMEOUT_REMOVE";
214 case IORING_OP_ACCEPT: return "ACCEPT";
215 case IORING_OP_ASYNC_CANCEL: return "ASYNC_CANCEL";
216 case IORING_OP_LINK_TIMEOUT: return "LINK_TIMEOUT";
217 case IORING_OP_CONNECT: return "CONNECT";
218 case IORING_OP_FALLOCATE: return "FALLOCATE";
219 case IORING_OP_OPENAT: return "OPENAT";
220 case IORING_OP_CLOSE: return "CLOSE";
221 case IORING_OP_FILES_UPDATE: return "FILES_UPDATE";
222 case IORING_OP_STATX: return "STATX";
223 case IORING_OP_READ: return "READ";
224 case IORING_OP_WRITE: return "WRITE";
225 case IORING_OP_FADVISE: return "FADVISE";
226 case IORING_OP_MADVISE: return "MADVISE";
227 case IORING_OP_SEND: return "SEND";
228 case IORING_OP_RECV: return "RECV";
229 case IORING_OP_OPENAT2: return "OPENAT2";
230 case IORING_OP_EPOLL_CTL: return "EPOLL_CTL";
231 case IORING_OP_SPLICE: return "SPLICE";
232 case IORING_OP_PROVIDE_BUFFERS: return "PROVIDE_BUFFERS";
233 case IORING_OP_REMOVE_BUFFERS: return "REMOVE_BUFFERS";
234 case IORING_OP_TEE: return "TEE";
235 case IORING_OP_SHUTDOWN: return "SHUTDOWN";
236 case IORING_OP_RENAMEAT: return "RENAMEAT";
237 case IORING_OP_UNLINKAT: return "UNLINKAT";
238 case IORING_OP_MKDIRAT: return "MKDIRAT";
239 case IORING_OP_SYMLINKAT: return "SYMLINKAT";
240 case IORING_OP_LINKAT: return "LINKAT";
241 default: return "[OP CODE " + op + ']';
242 }
243 }
244
245 static final int IORING_ENTER_GETEVENTS = NativeStaticallyReferencedJniMethods.ioringEnterGetevents();
246 static final int IOSQE_ASYNC = NativeStaticallyReferencedJniMethods.iosqeAsync();
247 static final int IOSQE_LINK = NativeStaticallyReferencedJniMethods.iosqeLink();
248 static final int IOSQE_IO_DRAIN = NativeStaticallyReferencedJniMethods.iosqeDrain();
249 static final int MSG_DONTWAIT = NativeStaticallyReferencedJniMethods.msgDontwait();
250 static final int MSG_FASTOPEN = NativeStaticallyReferencedJniMethods.msgFastopen();
251 static final int SOL_UDP = NativeStaticallyReferencedJniMethods.solUdp();
252 static final int UDP_SEGMENT = NativeStaticallyReferencedJniMethods.udpSegment();
253 private static final int TFO_ENABLED_CLIENT_MASK = 0x1;
254 private static final int TFO_ENABLED_SERVER_MASK = 0x2;
255 private static final int TCP_FASTOPEN_MODE = NativeStaticallyReferencedJniMethods.tcpFastopenMode();
256
257
258
259
260 static final boolean IS_SUPPORTING_TCP_FASTOPEN_CLIENT =
261 (TCP_FASTOPEN_MODE & TFO_ENABLED_CLIENT_MASK) == TFO_ENABLED_CLIENT_MASK;
262
263
264
265
266 static final boolean IS_SUPPORTING_TCP_FASTOPEN_SERVER =
267 (TCP_FASTOPEN_MODE & TFO_ENABLED_SERVER_MASK) == TFO_ENABLED_SERVER_MASK;
268
269 private static final int[] REQUIRED_IORING_OPS = {
270 IORING_OP_POLL_ADD,
271 IORING_OP_TIMEOUT,
272 IORING_OP_ACCEPT,
273 IORING_OP_READ,
274 IORING_OP_WRITE,
275 IORING_OP_POLL_REMOVE,
276 IORING_OP_CONNECT,
277 IORING_OP_CLOSE,
278 IORING_OP_WRITEV,
279 IORING_OP_SENDMSG,
280 IORING_OP_RECVMSG,
281 IORING_OP_ASYNC_CANCEL,
282 IORING_OP_RECV,
283 IORING_OP_NOP,
284 IORING_OP_SHUTDOWN,
285 IORING_OP_SEND
286 };
287
288 static RingBuffer createRingBuffer() {
289 return createRingBuffer(DEFAULT_RING_SIZE);
290 }
291
292 static RingBuffer createRingBuffer(int ringSize) {
293 ObjectUtil.checkPositive(ringSize, "ringSize");
294 long[][] values = ioUringSetup(ringSize);
295 assert values.length == 2;
296 long[] completionQueueArgs = values[1];
297 assert completionQueueArgs.length == 9;
298 CompletionQueue completionQueue = new CompletionQueue(
299 completionQueueArgs[0],
300 completionQueueArgs[1],
301 completionQueueArgs[2],
302 completionQueueArgs[3],
303 completionQueueArgs[4],
304 completionQueueArgs[5],
305 (int) completionQueueArgs[6],
306 completionQueueArgs[7],
307 (int) completionQueueArgs[8]);
308 long[] submissionQueueArgs = values[0];
309 assert submissionQueueArgs.length == 11;
310 SubmissionQueue submissionQueue = new SubmissionQueue(
311 submissionQueueArgs[0],
312 submissionQueueArgs[1],
313 submissionQueueArgs[2],
314 submissionQueueArgs[3],
315 submissionQueueArgs[4],
316 submissionQueueArgs[5],
317 submissionQueueArgs[6],
318 submissionQueueArgs[7],
319 (int) submissionQueueArgs[8],
320 submissionQueueArgs[9],
321 (int) submissionQueueArgs[10],
322 completionQueue);
323 return new RingBuffer(submissionQueue, completionQueue);
324 }
325
326 static void checkAllIOSupported(int ringFd) {
327 if (!ioUringProbe(ringFd, REQUIRED_IORING_OPS)) {
328 throw new UnsupportedOperationException("Not all operations are supported: "
329 + Arrays.toString(REQUIRED_IORING_OPS));
330 }
331 }
332
333 static boolean isIOUringCqeFSockNonEmptySupported(int ringFd) {
334
335 return ioUringProbe(ringFd, new int[] { Native.IORING_OP_SOCKET });
336 }
337
338 static boolean isIOUringSupportSplice(int ringFd) {
339
340 return ioUringProbe(ringFd, new int[] { Native.IORING_OP_SPLICE });
341 }
342
343 static void checkKernelVersion(String kernelVersion) {
344 boolean enforceKernelVersion = SystemPropertyUtil.getBoolean(
345 "io.netty5.transport.iouring.enforceKernelVersion", true);
346 boolean kernelSupported = checkKernelVersion0(kernelVersion);
347 if (!kernelSupported) {
348 if (enforceKernelVersion) {
349 throw new UnsupportedOperationException(
350 "you need at least kernel version 5.9, current kernel version: " + kernelVersion);
351 } else {
352 logger.debug("Detected kernel " + kernelVersion + " does not match minimum version of 5.9, " +
353 "trying to use io_uring anyway");
354 }
355 }
356 }
357
358 private static boolean checkKernelVersion0(String kernelVersion) {
359 String[] versionComponents = kernelVersion.split("\\.");
360 if (versionComponents.length < 3) {
361 return false;
362 }
363
364 int major;
365 try {
366 major = Integer.parseInt(versionComponents[0]);
367 } catch (NumberFormatException e) {
368 return false;
369 }
370
371 if (major <= 4) {
372 return false;
373 }
374 if (major > 5) {
375 return true;
376 }
377
378 int minor;
379 try {
380 minor = Integer.parseInt(versionComponents[1]);
381 } catch (NumberFormatException e) {
382 return false;
383 }
384
385 return minor >= 9;
386 }
387
388 private static native boolean ioUringProbe(int ringFd, int[] ios);
389 private static native long[][] ioUringSetup(int entries);
390
391 static native int ioUringEnter(int ringFd, int toSubmit, int minComplete, int flags);
392
393 static native void eventFdWrite(int fd, long value);
394
395 static int getFd(DefaultFileRegion fileChannel) {
396 return getFd0(fileChannel);
397 }
398
399 private static native int getFd0(Object fileChannel);
400
401 static FileDescriptor newBlockingEventFd() {
402 return new FileDescriptor(blockingEventFd());
403 }
404
405 static native void ioUringExit(long submissionQueueArrayAddress, int submissionQueueRingEntries,
406 long submissionQueueRingAddress, int submissionQueueRingSize,
407 long completionQueueRingAddress, int completionQueueRingSize,
408 int ringFd);
409
410 private static native int blockingEventFd();
411
412
413 static native int createFile(String name);
414
415 private static native int registerUnix();
416
417 static native long cmsghdrData(long hdrAddr);
418
419 static native String kernelVersion();
420
421 private Native() {
422
423 }
424
425
426 private static void loadNativeLibrary() {
427 String name = PlatformDependent.normalizedOs().toLowerCase(Locale.ROOT).trim();
428 if (!name.startsWith("linux")) {
429 throw new IllegalStateException("Only supported on Linux");
430 }
431 String staticLibName = "netty_transport_native_io_uring";
432 String sharedLibName = staticLibName + '_' + PlatformDependent.normalizedArch();
433 ClassLoader cl = PlatformDependent.getClassLoader(Native.class);
434 try {
435 NativeLibraryLoader.load(sharedLibName, cl);
436 } catch (UnsatisfiedLinkError e1) {
437 try {
438 NativeLibraryLoader.load(staticLibName, cl);
439 logger.info("Failed to load io_uring");
440 } catch (UnsatisfiedLinkError e2) {
441 ThrowableUtil.addSuppressed(e1, e2);
442 throw e1;
443 }
444 }
445 }
446 }