1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package io.netty.channel.uring;
17
18 import io.netty.util.internal.ObjectUtil;
19 import io.netty.util.internal.logging.InternalLogger;
20 import io.netty.util.internal.logging.InternalLoggerFactory;
21 import io.netty.channel.unix.FileDescriptor;
22 import io.netty.channel.unix.PeerCredentials;
23 import io.netty.channel.unix.Unix;
24 import io.netty.util.internal.ClassInitializerUtil;
25 import io.netty.util.internal.NativeLibraryLoader;
26 import io.netty.util.internal.PlatformDependent;
27 import io.netty.util.internal.SystemPropertyUtil;
28 import io.netty.util.internal.ThrowableUtil;
29
30 import java.io.File;
31 import java.io.IOException;
32 import java.nio.channels.Selector;
33 import java.nio.file.Path;
34 import java.util.Arrays;
35 import java.util.Locale;
36
37 final class Native {
38 private static final InternalLogger logger = InternalLoggerFactory.getInstance(Native.class);
39 static final int DEFAULT_RING_SIZE = Math.max(64, SystemPropertyUtil.getInt("io.netty.iouring.ringSize", 4096));
40
41 static {
42 Selector selector = null;
43 try {
44
45
46
47
48 selector = Selector.open();
49 } catch (IOException ignore) {
50
51 }
52
53
54
55
56
57
58 ClassInitializerUtil.tryLoadClasses(
59 Native.class,
60
61 PeerCredentials.class, java.io.FileDescriptor.class
62 );
63
64 File tmpDir = PlatformDependent.tmpdir();
65 Path tmpFile = tmpDir.toPath().resolve("netty_io_uring.tmp");
66 try {
67
68
69 Native.createFile(tmpFile.toString());
70 } catch (UnsatisfiedLinkError ignore) {
71
72 loadNativeLibrary();
73 } finally {
74 tmpFile.toFile().delete();
75 try {
76 if (selector != null) {
77 selector.close();
78 }
79 } catch (IOException ignore) {
80
81 }
82 }
83 Unix.registerInternal(Native::registerUnix);
84 }
85
86 static final int SOCK_NONBLOCK = NativeStaticallyReferencedJniMethods.sockNonblock();
87 static final int SOCK_CLOEXEC = NativeStaticallyReferencedJniMethods.sockCloexec();
88 static final short AF_INET = (short) NativeStaticallyReferencedJniMethods.afInet();
89 static final short AF_INET6 = (short) NativeStaticallyReferencedJniMethods.afInet6();
90 static final int SIZEOF_SOCKADDR_STORAGE = NativeStaticallyReferencedJniMethods.sizeofSockaddrStorage();
91 static final int SIZEOF_SOCKADDR_IN = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn();
92 static final int SIZEOF_SOCKADDR_IN6 = NativeStaticallyReferencedJniMethods.sizeofSockaddrIn6();
93 static final int SOCKADDR_IN_OFFSETOF_SIN_FAMILY =
94 NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinFamily();
95 static final int SOCKADDR_IN_OFFSETOF_SIN_PORT = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinPort();
96 static final int SOCKADDR_IN_OFFSETOF_SIN_ADDR = NativeStaticallyReferencedJniMethods.sockaddrInOffsetofSinAddr();
97 static final int IN_ADDRESS_OFFSETOF_S_ADDR = NativeStaticallyReferencedJniMethods.inAddressOffsetofSAddr();
98 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FAMILY =
99 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Family();
100 static final int SOCKADDR_IN6_OFFSETOF_SIN6_PORT =
101 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Port();
102 static final int SOCKADDR_IN6_OFFSETOF_SIN6_FLOWINFO =
103 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Flowinfo();
104 static final int SOCKADDR_IN6_OFFSETOF_SIN6_ADDR =
105 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6Addr();
106 static final int SOCKADDR_IN6_OFFSETOF_SIN6_SCOPE_ID =
107 NativeStaticallyReferencedJniMethods.sockaddrIn6OffsetofSin6ScopeId();
108 static final int IN6_ADDRESS_OFFSETOF_S6_ADDR = NativeStaticallyReferencedJniMethods.in6AddressOffsetofS6Addr();
109 static final int SIZEOF_SIZE_T = NativeStaticallyReferencedJniMethods.sizeofSizeT();
110 static final int SIZEOF_IOVEC = NativeStaticallyReferencedJniMethods.sizeofIovec();
111 static final int CMSG_SPACE = NativeStaticallyReferencedJniMethods.cmsgSpace();
112 static final int CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsgLen();
113 static final int CMSG_OFFSETOF_CMSG_LEN = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLen();
114 static final int CMSG_OFFSETOF_CMSG_LEVEL = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgLevel();
115 static final int CMSG_OFFSETOF_CMSG_TYPE = NativeStaticallyReferencedJniMethods.cmsghdrOffsetofCmsgType();
116
117 static final int IOVEC_OFFSETOF_IOV_BASE = NativeStaticallyReferencedJniMethods.iovecOffsetofIovBase();
118 static final int IOVEC_OFFSETOF_IOV_LEN = NativeStaticallyReferencedJniMethods.iovecOffsetofIovLen();
119 static final int SIZEOF_MSGHDR = NativeStaticallyReferencedJniMethods.sizeofMsghdr();
120 static final int MSGHDR_OFFSETOF_MSG_NAME = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgName();
121 static final int MSGHDR_OFFSETOF_MSG_NAMELEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgNamelen();
122 static final int MSGHDR_OFFSETOF_MSG_IOV = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIov();
123 static final int MSGHDR_OFFSETOF_MSG_IOVLEN = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgIovlen();
124 static final int MSGHDR_OFFSETOF_MSG_CONTROL = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControl();
125 static final int MSGHDR_OFFSETOF_MSG_CONTROLLEN =
126 NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgControllen();
127 static final int MSGHDR_OFFSETOF_MSG_FLAGS = NativeStaticallyReferencedJniMethods.msghdrOffsetofMsgFlags();
128 static final int POLLIN = NativeStaticallyReferencedJniMethods.pollin();
129 static final int POLLOUT = NativeStaticallyReferencedJniMethods.pollout();
130 static final int POLLRDHUP = NativeStaticallyReferencedJniMethods.pollrdhup();
131 static final int ERRNO_ECANCELED_NEGATIVE = -NativeStaticallyReferencedJniMethods.ecanceled();
132 static final int ERRNO_ETIME_NEGATIVE = -NativeStaticallyReferencedJniMethods.etime();
133
134
135
136
137 static final byte IORING_OP_NOP = 0;
138 static final byte IORING_OP_READV = 1;
139 static final byte IORING_OP_WRITEV = 2;
140 static final byte IORING_OP_FSYNC = 3;
141 static final byte IORING_OP_READ_FIXED = 4;
142 static final byte IORING_OP_WRITE_FIXED = 5;
143 static final byte IORING_OP_POLL_ADD = 6;
144 static final byte IORING_OP_POLL_REMOVE = 7;
145 static final byte IORING_OP_SYNC_FILE_RANGE = 8;
146 static final byte IORING_OP_SENDMSG = 9;
147 static final byte IORING_OP_RECVMSG = 10;
148 static final byte IORING_OP_TIMEOUT = 11;
149 static final byte IORING_OP_TIMEOUT_REMOVE = 12;
150 static final byte IORING_OP_ACCEPT = 13;
151 static final byte IORING_OP_ASYNC_CANCEL = 14;
152 static final byte IORING_OP_LINK_TIMEOUT = 15;
153 static final byte IORING_OP_CONNECT = 16;
154 static final byte IORING_OP_FALLOCATE = 17;
155 static final byte IORING_OP_OPENAT = 18;
156 static final byte IORING_OP_CLOSE = 19;
157 static final byte IORING_OP_FILES_UPDATE = 20;
158 static final byte IORING_OP_STATX = 21;
159 static final byte IORING_OP_READ = 22;
160 static final byte IORING_OP_WRITE = 23;
161 static final byte IORING_OP_FADVISE = 24;
162 static final byte IORING_OP_MADVISE = 25;
163 static final byte IORING_OP_SEND = 26;
164 static final byte IORING_OP_RECV = 27;
165 static final byte IORING_OP_OPENAT2 = 28;
166 static final byte IORING_OP_EPOLL_CTL = 29;
167 static final byte IORING_OP_SPLICE = 30;
168 static final byte IORING_OP_PROVIDE_BUFFERS = 31;
169 static final byte IORING_OP_REMOVE_BUFFERS = 32;
170 static final byte IORING_OP_TEE = 33;
171 static final byte IORING_OP_SHUTDOWN = 34;
172 static final byte IORING_OP_RENAMEAT = 35;
173 static final byte IORING_OP_UNLINKAT = 36;
174 static final byte IORING_OP_MKDIRAT = 37;
175 static final byte IORING_OP_SYMLINKAT = 38;
176 static final byte IORING_OP_LINKAT = 39;
177 static final byte IORING_OP_MSG_RING = 40;
178 static final byte IORING_OP_FSETXATTR = 41;
179 static final byte IORING_OP_SETXATTR = 42;
180 static final byte IORING_OP_FGETXATTR = 43;
181 static final byte IORING_OP_GETXATTR = 44;
182 static final byte IORING_OP_SOCKET = 45;
183 static final byte IORING_OP_URING_CMD = 46;
184 static final byte IORING_OP_SEND_ZC = 47;
185 static final byte IORING_OP_SENDMSG_ZC = 48;
186 static final byte IORING_OP_READ_MULTISHOT = 49;
187 static final byte IORING_OP_WAITID = 50;
188 static final byte IORING_OP_FUTEX_WAIT = 51;
189 static final byte IORING_OP_FUTEX_WAKE = 52;
190 static final byte IORING_OP_FUTEX_WAITV = 53;
191 static final byte IORING_OP_FIXED_FD_INSTALL = 54;
192 static final byte IORING_OP_FTRUNCATE = 55;
193 static final byte IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
194
195 static String opToStr(byte op) {
196 switch (op) {
197 case IORING_OP_NOP: return "NOP";
198 case IORING_OP_READV: return "READV";
199 case IORING_OP_WRITEV: return "WRITEV";
200 case IORING_OP_FSYNC: return "FSYNC";
201 case IORING_OP_READ_FIXED: return "READ_FIXED";
202 case IORING_OP_WRITE_FIXED: return "WRITE_FIXED";
203 case IORING_OP_POLL_ADD: return "POLL_ADD";
204 case IORING_OP_POLL_REMOVE: return "POLL_REMOVE";
205 case IORING_OP_SYNC_FILE_RANGE: return "SYNC_FILE_RANGE";
206 case IORING_OP_SENDMSG: return "SENDMSG";
207 case IORING_OP_RECVMSG: return "RECVMSG";
208 case IORING_OP_TIMEOUT: return "TIMEOUT";
209 case IORING_OP_TIMEOUT_REMOVE: return "TIMEOUT_REMOVE";
210 case IORING_OP_ACCEPT: return "ACCEPT";
211 case IORING_OP_ASYNC_CANCEL: return "ASYNC_CANCEL";
212 case IORING_OP_LINK_TIMEOUT: return "LINK_TIMEOUT";
213 case IORING_OP_CONNECT: return "CONNECT";
214 case IORING_OP_FALLOCATE: return "FALLOCATE";
215 case IORING_OP_OPENAT: return "OPENAT";
216 case IORING_OP_CLOSE: return "CLOSE";
217 case IORING_OP_FILES_UPDATE: return "FILES_UPDATE";
218 case IORING_OP_STATX: return "STATX";
219 case IORING_OP_READ: return "READ";
220 case IORING_OP_WRITE: return "WRITE";
221 case IORING_OP_FADVISE: return "FADVISE";
222 case IORING_OP_MADVISE: return "MADVISE";
223 case IORING_OP_SEND: return "SEND";
224 case IORING_OP_RECV: return "RECV";
225 case IORING_OP_OPENAT2: return "OPENAT2";
226 case IORING_OP_EPOLL_CTL: return "EPOLL_CTL";
227 case IORING_OP_SPLICE: return "SPLICE";
228 case IORING_OP_PROVIDE_BUFFERS: return "PROVIDE_BUFFERS";
229 case IORING_OP_REMOVE_BUFFERS: return "REMOVE_BUFFERS";
230 case IORING_OP_TEE: return "TEE";
231 case IORING_OP_SHUTDOWN: return "SHUTDOWN";
232 case IORING_OP_RENAMEAT: return "RENAMEAT";
233 case IORING_OP_UNLINKAT: return "UNLINKAT";
234 case IORING_OP_MKDIRAT: return "MKDIRAT";
235 case IORING_OP_SYMLINKAT: return "SYMLINKAT";
236 case IORING_OP_LINKAT: return "LINKAT";
237 default: return "[OP CODE " + op + ']';
238 }
239 }
240
241 static final int IORING_ENTER_GETEVENTS = NativeStaticallyReferencedJniMethods.ioringEnterGetevents();
242 static final int IOSQE_ASYNC = NativeStaticallyReferencedJniMethods.iosqeAsync();
243 static final int IOSQE_LINK = NativeStaticallyReferencedJniMethods.iosqeLink();
244 static final int IOSQE_IO_DRAIN = NativeStaticallyReferencedJniMethods.iosqeDrain();
245 static final int MSG_DONTWAIT = NativeStaticallyReferencedJniMethods.msgDontwait();
246 static final int MSG_FASTOPEN = NativeStaticallyReferencedJniMethods.msgFastopen();
247 static final int SOL_UDP = NativeStaticallyReferencedJniMethods.solUdp();
248 static final int UDP_SEGMENT = NativeStaticallyReferencedJniMethods.udpSegment();
249 private static final int TFO_ENABLED_CLIENT_MASK = 0x1;
250 private static final int TFO_ENABLED_SERVER_MASK = 0x2;
251 private static final int TCP_FASTOPEN_MODE = NativeStaticallyReferencedJniMethods.tcpFastopenMode();
252
253
254
255
256 static final boolean IS_SUPPORTING_TCP_FASTOPEN_CLIENT =
257 (TCP_FASTOPEN_MODE & TFO_ENABLED_CLIENT_MASK) == TFO_ENABLED_CLIENT_MASK;
258
259
260
261
262 static final boolean IS_SUPPORTING_TCP_FASTOPEN_SERVER =
263 (TCP_FASTOPEN_MODE & TFO_ENABLED_SERVER_MASK) == TFO_ENABLED_SERVER_MASK;
264
265 private static final int[] REQUIRED_IORING_OPS = {
266 IORING_OP_POLL_ADD,
267 IORING_OP_TIMEOUT,
268 IORING_OP_ACCEPT,
269 IORING_OP_READ,
270 IORING_OP_WRITE,
271 IORING_OP_POLL_REMOVE,
272 IORING_OP_CONNECT,
273 IORING_OP_CLOSE,
274 IORING_OP_WRITEV,
275 IORING_OP_SENDMSG,
276 IORING_OP_RECVMSG,
277 IORING_OP_ASYNC_CANCEL,
278 IORING_OP_RECV,
279 IORING_OP_NOP,
280 IORING_OP_SHUTDOWN,
281 IORING_OP_SEND
282 };
283
284 static RingBuffer createRingBuffer() {
285 return createRingBuffer(DEFAULT_RING_SIZE);
286 }
287
288 static RingBuffer createRingBuffer(int ringSize) {
289 ObjectUtil.checkPositive(ringSize, "ringSize");
290 long[][] values = ioUringSetup(ringSize);
291 assert values.length == 2;
292 long[] completionQueueArgs = values[1];
293 assert completionQueueArgs.length == 9;
294 CompletionQueue completionQueue = new CompletionQueue(
295 completionQueueArgs[0],
296 completionQueueArgs[1],
297 completionQueueArgs[2],
298 completionQueueArgs[3],
299 completionQueueArgs[4],
300 completionQueueArgs[5],
301 (int) completionQueueArgs[6],
302 completionQueueArgs[7],
303 (int) completionQueueArgs[8]);
304 long[] submissionQueueArgs = values[0];
305 assert submissionQueueArgs.length == 11;
306 SubmissionQueue submissionQueue = new SubmissionQueue(
307 submissionQueueArgs[0],
308 submissionQueueArgs[1],
309 submissionQueueArgs[2],
310 submissionQueueArgs[3],
311 submissionQueueArgs[4],
312 submissionQueueArgs[5],
313 submissionQueueArgs[6],
314 submissionQueueArgs[7],
315 (int) submissionQueueArgs[8],
316 submissionQueueArgs[9],
317 (int) submissionQueueArgs[10],
318 completionQueue);
319 return new RingBuffer(submissionQueue, completionQueue);
320 }
321
322 static void checkAllIOSupported(int ringFd) {
323 if (!ioUringProbe(ringFd, REQUIRED_IORING_OPS)) {
324 throw new UnsupportedOperationException("Not all operations are supported: "
325 + Arrays.toString(REQUIRED_IORING_OPS));
326 }
327 }
328
329 static boolean isIOUringCqeFSockNonEmptySupported(int ringFd) {
330
331 return ioUringProbe(ringFd, new int[] { Native.IORING_OP_SOCKET });
332 }
333
334 static void checkKernelVersion(String kernelVersion) {
335 boolean enforceKernelVersion = SystemPropertyUtil.getBoolean(
336 "io.netty5.transport.iouring.enforceKernelVersion", true);
337 boolean kernelSupported = checkKernelVersion0(kernelVersion);
338 if (!kernelSupported) {
339 if (enforceKernelVersion) {
340 throw new UnsupportedOperationException(
341 "you need at least kernel version 5.9, current kernel version: " + kernelVersion);
342 } else {
343 logger.debug("Detected kernel " + kernelVersion + " does not match minimum version of 5.9, " +
344 "trying to use io_uring anyway");
345 }
346 }
347 }
348
349 private static boolean checkKernelVersion0(String kernelVersion) {
350 String[] versionComponents = kernelVersion.split("\\.");
351 if (versionComponents.length < 3) {
352 return false;
353 }
354
355 int major;
356 try {
357 major = Integer.parseInt(versionComponents[0]);
358 } catch (NumberFormatException e) {
359 return false;
360 }
361
362 if (major <= 4) {
363 return false;
364 }
365 if (major > 5) {
366 return true;
367 }
368
369 int minor;
370 try {
371 minor = Integer.parseInt(versionComponents[1]);
372 } catch (NumberFormatException e) {
373 return false;
374 }
375
376 return minor >= 9;
377 }
378
379 private static native boolean ioUringProbe(int ringFd, int[] ios);
380 private static native long[][] ioUringSetup(int entries);
381
382 static native int ioUringEnter(int ringFd, int toSubmit, int minComplete, int flags);
383
384 static native void eventFdWrite(int fd, long value);
385
386 static FileDescriptor newBlockingEventFd() {
387 return new FileDescriptor(blockingEventFd());
388 }
389
390 static native void ioUringExit(long submissionQueueArrayAddress, int submissionQueueRingEntries,
391 long submissionQueueRingAddress, int submissionQueueRingSize,
392 long completionQueueRingAddress, int completionQueueRingSize,
393 int ringFd);
394
395 private static native int blockingEventFd();
396
397
398 static native int createFile(String name);
399
400 private static native int registerUnix();
401
402 static native long cmsghdrData(long hdrAddr);
403
404 static native String kernelVersion();
405
406 private Native() {
407
408 }
409
410
411 private static void loadNativeLibrary() {
412 String name = PlatformDependent.normalizedOs().toLowerCase(Locale.ROOT).trim();
413 if (!name.startsWith("linux")) {
414 throw new IllegalStateException("Only supported on Linux");
415 }
416 String staticLibName = "netty_transport_native_io_uring";
417 String sharedLibName = staticLibName + '_' + PlatformDependent.normalizedArch();
418 ClassLoader cl = PlatformDependent.getClassLoader(Native.class);
419 try {
420 NativeLibraryLoader.load(sharedLibName, cl);
421 } catch (UnsatisfiedLinkError e1) {
422 try {
423 NativeLibraryLoader.load(staticLibName, cl);
424 logger.info("Failed to load io_uring");
425 } catch (UnsatisfiedLinkError e2) {
426 ThrowableUtil.addSuppressed(e1, e2);
427 throw e1;
428 }
429 }
430 }
431 }