diff --git a/.gitignore b/.gitignore index fe0d5f4aa..5822f761f 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,7 @@ dist test *_test build +build-* config.mk hconfig.h html/uploads diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e58fad02..8d8502e34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,10 @@ option(WITH_MBEDTLS "with mbedtls library" OFF) option(WITH_KCP "compile event/kcp" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Linux") + option(WITH_IO_URING "with io_uring" OFF) +endif() + if(WIN32 OR MINGW) option(WITH_WEPOLL "compile event/wepoll -> use iocp" ON) option(ENABLE_WINDUMP "Windows MiniDumpWriteDump" OFF) @@ -100,6 +104,9 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hconfig.h.in ${CMAKE_CURRENT_SOURCE_D # see Makefile.in set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD_REQUIRED True) +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_C_COMPILER_ID STREQUAL "GNU") + set(CMAKE_C_EXTENSIONS ON) +endif() set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED True) @@ -165,6 +172,13 @@ if(WITH_MBEDTLS) set(LIBS ${LIBS} mbedtls mbedx509 mbedcrypto) endif() +if(WITH_IO_URING) + if(NOT CMAKE_SYSTEM_NAME STREQUAL "Linux") + message(FATAL_ERROR "WITH_IO_URING is only supported on Linux because liburing is Linux-only.") + endif() + set(LIBS ${LIBS} uring) +endif() + if(WIN32 OR MINGW) add_definitions(-DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS -D_WIN32_WINNT=0x0600) set(LIBS ${LIBS} secur32 crypt32 winmm iphlpapi ws2_32) diff --git a/Makefile.in b/Makefile.in index cceb29d8f..1b807ed38 100644 --- a/Makefile.in +++ b/Makefile.in @@ -87,8 +87,12 @@ endif endif ifeq ($(findstring -std, $(CFLAGS)), ) +ifeq ($(OS), Linux) +override CFLAGS += -std=gnu99 +else override CFLAGS += -std=c99 endif +endif ifeq ($(findstring -std, $(CXXFLAGS)), ) override CXXFLAGS += -std=c++11 @@ -193,6 +197,13 @@ endif endif endif +ifeq ($(OS), Linux) +ifeq ($(WITH_IO_URING), yes) + LDFLAGS += -luring +endif +endif + + LDFLAGS += $(addprefix -L, $(LIBDIRS)) LDFLAGS += $(addprefix -l, $(LIBS)) diff --git a/config.ini b/config.ini index e3145a666..f9a5a43b4 100644 --- a/config.ini +++ b/config.ini @@ -38,3 +38,6 @@ WITH_MBEDTLS=no # rudp WITH_KCP=no + +# event +WITH_IO_URING=no diff --git a/configure b/configure index 535599d0e..396762389 100755 --- a/configure +++ b/configure @@ -45,6 +45,9 @@ dependencies: rudp: --with-kcp compile with kcp? (DEFAULT: $WITH_KCP) +event: + --with-io_uring compile with io_uring? (DEFAULT: $WITH_IO_URING) + END } @@ -298,6 +301,7 @@ option=WITH_MBEDTLS && check_option option=ENABLE_UDS && check_option option=USE_MULTIMAP && check_option option=WITH_KCP && check_option +option=WITH_IO_URING && check_option # end confile cat << END >> $confile diff --git a/docs/PLAN.md b/docs/PLAN.md index b389a39a1..69dac01b9 100644 --- a/docs/PLAN.md +++ b/docs/PLAN.md @@ -1,7 +1,7 @@ ## Done - base: cross platfrom infrastructure -- event: select/poll/epoll/wepoll/kqueue/port +- event: select/poll/epoll/wepoll/kqueue/port/io_uring - ssl: openssl/gnutls/mbedtls/wintls/appletls - rudp: KCP - evpp: c++ EventLoop interface similar to muduo and evpp @@ -22,7 +22,6 @@ - hrpc = libhv + protobuf - rudp: FEC, ARQ, UDT, QUIC - kcptun -- have a taste of io_uring - coroutine - cppsocket.io - IM-libhv diff --git a/event/README.md b/event/README.md index d92834284..308aabeb3 100644 --- a/event/README.md +++ b/event/README.md @@ -11,6 +11,7 @@ ├── select.c EVENT_SELECT实现 ├── poll.c EVENT_POLL实现 ├── epoll.c EVENT_EPOLL实现 (for OS_LINUX) +├── io_uring.c EVENT_IO_URING实现 (for OS_LINUX, with liburing) ├── iocp.c EVENT_IOCP实现 (for OS_WIN) ├── kqueue.c EVENT_KQUEUE实现(for OS_BSD/OS_MAC) ├── evport.c EVENT_PORT实现 (for OS_SOLARIS) diff --git a/event/hloop.c b/event/hloop.c index d0e042c3e..5999daf90 100644 --- a/event/hloop.c +++ b/event/hloop.c @@ -771,6 +771,8 @@ const char* hio_engine() { return "iocp"; #elif defined(EVENT_PORT) return "evport"; +#elif defined(EVENT_IO_URING) + return "io_uring"; #else return "noevent"; #endif diff --git a/event/hloop.h b/event/hloop.h index f070fea26..3e268fc3b 100644 --- a/event/hloop.h +++ b/event/hloop.h @@ -233,6 +233,8 @@ const char* hio_engine() { return "iocp"; #elif defined(EVENT_PORT) return "evport"; +#elif defined(EVENT_IO_URING) + return "io_uring"; #else return "noevent"; #endif diff --git a/event/io_uring.c b/event/io_uring.c new file mode 100644 index 000000000..0b677d126 --- /dev/null +++ b/event/io_uring.c @@ -0,0 +1,232 @@ +#include "iowatcher.h" + +#ifdef EVENT_IO_URING +#include "hplatform.h" +#include "hdef.h" +#include "hevent.h" + +#include +#include + +#define IO_URING_ENTRIES 1024 +#define IO_URING_CANCEL_TAG ((void*)(uintptr_t)-1) + +typedef struct io_uring_ctx_s { + struct io_uring ring; + int nfds; +} io_uring_ctx_t; + +int iowatcher_init(hloop_t* loop) { + if (loop->iowatcher) return 0; + io_uring_ctx_t* ctx; + HV_ALLOC_SIZEOF(ctx); + int ret = io_uring_queue_init(IO_URING_ENTRIES, &ctx->ring, 0); + if (ret < 0) { + HV_FREE(ctx); + return ret; + } + ctx->nfds = 0; + loop->iowatcher = ctx; + return 0; +} + +int iowatcher_cleanup(hloop_t* loop) { + if (loop->iowatcher == NULL) return 0; + io_uring_ctx_t* ctx = (io_uring_ctx_t*)loop->iowatcher; + io_uring_queue_exit(&ctx->ring); + HV_FREE(loop->iowatcher); + return 0; +} + +static struct io_uring_sqe* io_uring_get_sqe_safe(struct io_uring* ring) { + struct io_uring_sqe* sqe = io_uring_get_sqe(ring); + if (sqe == NULL) { + // SQ is full, flush pending submissions and retry + io_uring_submit(ring); + sqe = io_uring_get_sqe(ring); + } + return sqe; +} + +int iowatcher_add_event(hloop_t* loop, int fd, int events) { + if (loop->iowatcher == NULL) { + int ret = iowatcher_init(loop); + if (ret < 0) { + return ret; + } + } + io_uring_ctx_t* ctx = (io_uring_ctx_t*)loop->iowatcher; + hio_t* io = loop->ios.ptr[fd]; + + unsigned poll_mask = 0; + // pre events + if (io->events & HV_READ) { + poll_mask |= POLLIN; + } + if (io->events & HV_WRITE) { + poll_mask |= POLLOUT; + } + // now events + if (events & HV_READ) { + poll_mask |= POLLIN; + } + if (events & HV_WRITE) { + poll_mask |= POLLOUT; + } + + struct io_uring_sqe* sqe; + if (io->events != 0) { + // Cancel the existing poll request first + sqe = io_uring_get_sqe_safe(&ctx->ring); + if (sqe == NULL) return -1; + io_uring_prep_poll_remove(sqe, (uint64_t)fd); + io_uring_sqe_set_data(sqe, IO_URING_CANCEL_TAG); + } else { + ctx->nfds++; + } + + // Add poll for the combined events + sqe = io_uring_get_sqe_safe(&ctx->ring); + if (sqe == NULL) return -1; + io_uring_prep_poll_add(sqe, fd, poll_mask); + io_uring_sqe_set_data(sqe, (void*)(uintptr_t)fd); + + io_uring_submit(&ctx->ring); + return 0; +} + +int iowatcher_del_event(hloop_t* loop, int fd, int events) { + io_uring_ctx_t* ctx = (io_uring_ctx_t*)loop->iowatcher; + if (ctx == NULL) return 0; + hio_t* io = loop->ios.ptr[fd]; + + // Calculate remaining events + unsigned poll_mask = 0; + // pre events + if (io->events & HV_READ) { + poll_mask |= POLLIN; + } + if (io->events & HV_WRITE) { + poll_mask |= POLLOUT; + } + // now events + if (events & HV_READ) { + poll_mask &= ~POLLIN; + } + if (events & HV_WRITE) { + poll_mask &= ~POLLOUT; + } + + // Cancel existing poll + struct io_uring_sqe* sqe = io_uring_get_sqe_safe(&ctx->ring); + if (sqe == NULL) return -1; + io_uring_prep_poll_remove(sqe, (uint64_t)fd); + io_uring_sqe_set_data(sqe, IO_URING_CANCEL_TAG); + + if (poll_mask == 0) { + ctx->nfds--; + } else { + // Re-add with remaining events + sqe = io_uring_get_sqe_safe(&ctx->ring); + if (sqe == NULL) return -1; + io_uring_prep_poll_add(sqe, fd, poll_mask); + io_uring_sqe_set_data(sqe, (void*)(uintptr_t)fd); + } + + io_uring_submit(&ctx->ring); + return 0; +} + +int iowatcher_poll_events(hloop_t* loop, int timeout) { + io_uring_ctx_t* ctx = (io_uring_ctx_t*)loop->iowatcher; + if (ctx == NULL) return 0; + if (ctx->nfds == 0) return 0; + + struct __kernel_timespec ts; + struct __kernel_timespec* tp = NULL; + if (timeout != INFINITE) { + ts.tv_sec = timeout / 1000; + ts.tv_nsec = (timeout % 1000) * 1000000LL; + tp = &ts; + } + + struct io_uring_cqe* cqe; + int ret; + if (tp) { + ret = io_uring_wait_cqe_timeout(&ctx->ring, &cqe, tp); + } else { + ret = io_uring_wait_cqe(&ctx->ring, &cqe); + } + if (ret < 0) { + if (ret == -ETIME || ret == -EINTR) { + return 0; + } + perror("io_uring_wait_cqe"); + return ret; + } + + int nevents = 0; + int sqe_queued = 0; + unsigned nready = io_uring_cq_ready(&ctx->ring); + unsigned i; + for (i = 0; i < nready; ++i) { + if (io_uring_peek_cqe(&ctx->ring, &cqe) != 0) break; + void* data = io_uring_cqe_get_data(cqe); + if (data == IO_URING_CANCEL_TAG) { + io_uring_cqe_seen(&ctx->ring, cqe); + continue; + } + + int fd = (int)(uintptr_t)data; + if (fd < 0 || fd >= loop->ios.maxsize) { + io_uring_cqe_seen(&ctx->ring, cqe); + continue; + } + hio_t* io = loop->ios.ptr[fd]; + if (io == NULL) { + io_uring_cqe_seen(&ctx->ring, cqe); + continue; + } + + if (cqe->res < 0) { + // Poll request failed: notify registered events, or both if none registered + io->revents |= (io->events ? io->events : HV_RDWR); + EVENT_PENDING(io); + ++nevents; + } else { + int revents = cqe->res; + if (revents & (POLLIN | POLLHUP | POLLERR)) { + io->revents |= HV_READ; + } + if (revents & (POLLOUT | POLLHUP | POLLERR)) { + io->revents |= HV_WRITE; + } + if (io->revents) { + EVENT_PENDING(io); + ++nevents; + } + } + + io_uring_cqe_seen(&ctx->ring, cqe); + + // io_uring POLL_ADD is one-shot, re-arm for the same events + unsigned remask = 0; + if (io->events & HV_READ) remask |= POLLIN; + if (io->events & HV_WRITE) remask |= POLLOUT; + if (remask) { + struct io_uring_sqe* sqe = io_uring_get_sqe_safe(&ctx->ring); + if (sqe) { + io_uring_prep_poll_add(sqe, fd, remask); + io_uring_sqe_set_data(sqe, (void*)(uintptr_t)fd); + sqe_queued = 1; + } + } + } + + if (sqe_queued) { + io_uring_submit(&ctx->ring); + } + + return nevents; +} +#endif diff --git a/event/iowatcher.h b/event/iowatcher.h index 1c6853d6b..5432bc9df 100644 --- a/event/iowatcher.h +++ b/event/iowatcher.h @@ -10,6 +10,7 @@ !defined(EVENT_KQUEUE) && \ !defined(EVENT_IOCP) && \ !defined(EVENT_PORT) && \ + !defined(EVENT_IO_URING) && \ !defined(EVENT_NOEVENT) #ifdef OS_WIN #if WITH_WEPOLL @@ -18,7 +19,11 @@ #define EVENT_POLL // WSAPoll #endif #elif defined(OS_LINUX) -#define EVENT_EPOLL + #if WITH_IO_URING + #define EVENT_IO_URING + #else + #define EVENT_EPOLL + #endif #elif defined(OS_MAC) #define EVENT_KQUEUE #elif defined(OS_BSD) diff --git a/hconfig.h.in b/hconfig.h.in index 700f9e7dc..4880666a8 100644 --- a/hconfig.h.in +++ b/hconfig.h.in @@ -98,5 +98,6 @@ #cmakedefine WITH_WEPOLL 1 #cmakedefine WITH_KCP 1 +#cmakedefine WITH_IO_URING 1 #endif // HV_CONFIG_H_