diff --git a/CMakeLists.txt b/CMakeLists.txt index 8eb3e94..8026f95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -208,6 +208,28 @@ if (WolfSSL_FOUND) target_compile_definitions(boost_corosio_wolfssl PUBLIC BOOST_COROSIO_HAS_WOLFSSL) endif () +#------------------------------------------------- +# +# OpenSSL +# +#------------------------------------------------- +find_package(OpenSSL) +if (OpenSSL_FOUND) + file(GLOB_RECURSE BOOST_COROSIO_OPENSSL_HEADERS CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/include/boost/corosio/openssl/*.hpp") + file(GLOB_RECURSE BOOST_COROSIO_OPENSSL_SOURCES CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/src/openssl/src/*.hpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/openssl/src/*.cpp") + source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}/include/boost/corosio/openssl" PREFIX "include" FILES ${BOOST_COROSIO_OPENSSL_HEADERS}) + source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}/src/openssl/src" PREFIX "src" FILES ${BOOST_COROSIO_OPENSSL_SOURCES}) + add_library(boost_corosio_openssl ${BOOST_COROSIO_OPENSSL_HEADERS} ${BOOST_COROSIO_OPENSSL_SOURCES}) + add_library(Boost::corosio_openssl ALIAS boost_corosio_openssl) + boost_corosio_setup_properties(boost_corosio_openssl) + target_link_libraries(boost_corosio_openssl PUBLIC boost_corosio) + target_link_libraries(boost_corosio_openssl PRIVATE OpenSSL::SSL OpenSSL::Crypto) + target_compile_definitions(boost_corosio_openssl PUBLIC BOOST_COROSIO_HAS_OPENSSL) +endif () + #------------------------------------------------- # # Tests diff --git a/CMakePresets.json b/CMakePresets.json index ca4abfb..e69de29 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,16 +0,0 @@ -{ - "version": 8, - "configurePresets": [ - { - "name": "Custom configure preset", - "displayName": "Custom configure preset", - "description": "Sets Ninja generator, build and install directory", - "generator": "Ninja", - "binaryDir": "${sourceDir}/out/build/${presetName}", - "cacheVariables": { - "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" - } - } - ] -} \ No newline at end of file diff --git a/context/affine-awaitables.md b/context/affine-awaitables.md deleted file mode 100644 index e360415..0000000 --- a/context/affine-awaitables.md +++ /dev/null @@ -1,584 +0,0 @@ -**Document number:** PXXXXR0 -**Date:** 2025-12-30 -**Reply-to:** Vinnie Falco \ -**Audience:** SG1, LEWG - ---- - -# Affine Awaitables : Zero-Overhead Scheduler Affinity - -## Abstract - -This document proposes a minimal protocol for zero-overhead scheduler affinity in C++ coroutines. By introducing the `dispatcher` and -`affine_awaitable` concepts, awaitables can opt into zero-allocation scheduler affinity without requiring the full sender/receiver protocol. The protocol is library-implementable today; standardization would bless the concepts and enable ecosystem-wide adoption. - ---- - -## 1. The Lost Context Problem - -Consider a common scenario: a coroutine running on the UI thread needs to fetch data from the network. - -```cpp -task ui_handler() { // Runs on UI thread - auto data = co_await fetch(); // fetch() completes on network thread - update_ui(data); // BUG: Where are we now? -} -``` - -When `fetch()` completes, the network subsystem resumes our coroutine—but on the *network* thread, not the UI thread. The call to `update_ui()` races, crashes, or corrupts state. This is the scheduler affinity problem: after any `co_await`, how does a coroutine resume on its *home* scheduler? - -The challenge deepens when coroutines call other coroutines: - -```cpp -task fetch_and_process() { // Started on UI thread - auto raw = co_await network_read(); // Completes on network thread - auto parsed = co_await parse(raw); // Where does parse() run? - co_return transform(parsed); // Where does this run? -} -``` - -Every suspension point is an opportunity to get lost. - ---- - -## 2. Today's Solutions, and Costs - -P3552 proposes coroutine tasks with scheduler affinity—ensuring a coroutine resumes on its designated scheduler after any `co_await`. Several approaches exist: - -### 2.1 Manual Discipline - -```cpp -task ui_handler() { - auto data = co_await fetch(); - co_await resume_on(ui_scheduler); // Programmer must remember - update_ui(data); -} -``` - -**Problem:** Error-prone. Every `co_await` is a potential bug. - -### 2.2 Sender Composition - -```cpp -task ui_handler() { - auto data = co_await continues_on(fetch(), ui_scheduler); - update_ui(data); // Guaranteed: UI thread -} -``` - -**Problem:** Only works with P2300 senders, excluding the ecosystem of awaitables (Asio, custom libraries). This path makes unproven assumptions that senders will be ideal for all future, undiscovered use-cases. - -### 2.3 Universal Trampoline - -Our helper `make_affine` wraps any awaitable in a coroutine to provide scheduler affinity. The trampoline awaits the non-affine awaitable, then dispatches the continuation through the caller's dispatcher. This enables scheduler affinity for awaitables that do not participate in the affine protocol. The reference implementation includes `[[clang::coro_await_elidable]]` to enable HALO optimization when possible; see [`make_affine.hpp`](https://github.com/cppalliance/corosio/blob/master/include/capy/make_affine.hpp). - -> **Note:** `make_affine` is not proposed for standardization. It is provided as an example implementation technique in the reference implementation. This proposal only standardizes the protocol itself (§4) and the concepts that detect it (§4.1, §4.2). - -```cpp -// Works with ANY awaitable -auto data = co_await make_affine(fetch(), dispatcher); -``` - -**Problem:** Typically allocates on every `co_await`. The trampoline coroutine handle escapes to the dispatcher, which may prevent HALO optimization from applying. HALO is unreliable across compilers. - -> **Note on HALO reliability:** Testing shows HALO is far more limited than often assumed. MSVC (19.50) does not implement HALO at all. Clang elides only the *outermost* coroutine frame; nested coroutines still allocate. The affine protocol provides the only *reliable* zero-allocation path. - -### 2.4 Why This Matters Now - -P3552 (`std::execution::task`) is about to be standardized and makes significant assumptions about sender/receiver's universal applicability to asynchronous workloads. The framework has demonstrable value for GPU and parallel workloads, where its design strengths align with the problem domain. However, for CPU-bound I/O workloads such as networking, the committee has not yet explored the use case. Early studies by the authors suggest that sender APIs may not be optimal for networking's particular needs. - -Our design is simpler, less risky, and provides a foundation upon which a standard task type can be built. The broader ecosystem which does not currently use senders, and future user code which may not need senders, would benefit from our proposal: - -**Ecosystem compatibility.** Boost.Asio has active coroutine support. Libraries like libcoro, folly::coro, and Bloomberg's Quantum are actively maintained and power real systems today. Countless custom awaitables exist in proprietary codebases. These libraries represent proven, stable infrastructure that developers rely on. Affine awaitables let this infrastructure benefit from scheduler affinity without requiring conversion to the sender/receiver protocol. - -**Incremental adoption.** Not every codebase needs the full compositional power of senders. Many applications simply need coroutines that resume on the correct thread. The affine protocol (~10 lines) provides a lower barrier to entry than the full sender/receiver protocol (~100+ lines), allowing teams to adopt scheduler affinity immediately and evolve toward senders as their needs grow. - -**No-regret design.** Affine awaitables integrate seamlessly with P2300—senders flow through the same `await_transform` and receive optimal handling. Whether senders become ubiquitous or adoption is slower than expected, affine awaitables ensure scheduler affinity is available today and remain useful for custom awaitables and legacy integration. - -**Layered abstraction.** The dispatcher concept is more general than the scheduler concept—every scheduler can produce a dispatcher, but not every dispatcher requires the full sender/receiver protocol. This positions affine awaitables as a foundation that sender-based designs can build upon (see §11). - ---- - -## 3. The Solution: One Parameter - -The allocation in `make_affine` happens because it requires an extra coroutine (the trampoline) to intercept and redirect resumption. The awaitable doesn't know where to resume—it hands back control blindly, and the trampoline coroutine must be created to handle the dispatch, with whatever overhead that brings. - -The fix is simple: tell the awaitable where to resume. - -```cpp -// Standard: awaitable is blind -void await_suspend(std::coroutine_handle<> h); - -// Extended: awaitable knows the dispatcher -void await_suspend(std::coroutine_handle<> h, Dispatcher const& d); -``` - -One additional parameter eliminates the allocation. - -The awaitable receives the dispatcher and uses it directly: - -```cpp -template -auto await_suspend(std::coroutine_handle<> h, Dispatcher const& d) { - return d(h); // Resume through dispatcher, return handle for symmetric transfer -} -``` - -No trampoline. No allocation. The awaitable handles affinity internally. - -### 3.1 Propagation - -However, `await_suspend(h, d)` only tells the awaitable how to be dispatched—it does not propagate affinity through nested coroutine chains. Consider: - -```cpp -task outer() { - auto result = co_await inner(); // inner() is also a task - co_return result; -} - -task inner() { - auto data = co_await fetch(); // Resumes on unknown context - co_return data; -} -``` - -If `inner()` is a coroutine task that doesn't implement the propagation rules, affinity is lost when `inner()` awaits other operations. The `fetch()` operation completes and resumes `inner()` on an unknown context, breaking the affinity chain. Our proposal includes guidance for propagating affinity that does not require any additional standard library support. It is a set of rules that awaitable authors can follow to propagate affinity at zero cost for any affine awaitable. By offering rules instead of standardese, authors have flexibility in implementation (see §4.4). - ---- - -## 4. The Affine Awaitable Protocol - -The protocol outlined in this section gives authors tools to solve both aspects of the lost context problem: awaitable affinity, and affinity propagation. - -- **As an Awaitable**: You implement the protocol to respect the Caller's affinity (resume them where they want). - -- **As a Task Type**: You implement the promise logic to enforce Your affinity (resume yourself where you want). - -This protocol is fully implementable as a library today; standardization would bless the concepts and reduce boilerplate, without requiring `std::execution::task`. Dispatchers can also carry extra policy (e.g., priority save/restore) without affecting the core protocol. - -### 4.1 Dispatcher Concept - -A **dispatcher** accepts a coroutine handle for resumption and returns a coroutine handle for symmetric transfer: - -```cpp -template -concept dispatcher = requires(D const& d, std::coroutine_handle

h) { - { d(h) } -> std::convertible_to>; -}; -``` - -*Remarks:* The dispatcher must return a `std::coroutine_handle<>` (or convertible type) to enable symmetric transfer. Calling `d(h)` schedules `h` for resumption (typically on a specific execution context) and returns a coroutine handle that the caller may use for symmetric transfer. The dispatcher must be const-callable (logical constness), enabling thread-safe concurrent dispatch from multiple coroutines. Since `std::coroutine_handle

` has `operator()` which invokes `resume()`, the handle itself is callable and can be dispatched directly. - -*Lifetime:* In `await_suspend`, the dispatcher is received by const lvalue reference. The callee treats it as *borrowed for the duration of the await* (do not retain past completion). Ownership and lifetime are the caller's responsibility. The caller may store the dispatcher by value (if owning) or by pointer/reference (if externally owned) but must present it to callees as a const lvalue reference. - -### 4.2 Affine Awaitable Concept - -An awaitable is *affine* if it accepts a dispatcher in `await_suspend`: - -```cpp -template -concept affine_awaitable = - dispatcher && - requires(A a, std::coroutine_handle

h, D const& d) { - a.await_suspend(h, d); - }; -``` - -*Remarks:* This concept detects awaitables that participate in the affine protocol by providing the extended `await_suspend(h, d)` overload. The dispatcher is passed by const lvalue reference, and the awaitable must use it to resume the caller. - -### 4.3 Awaitable Requirements (callee role) - -An awaitable **participates in the affinity protocol** if it provides an overload: - -```cpp -template -auto await_suspend(std::coroutine_handle<> h, Dispatcher const& d) { - return d(h); // Possible implementation: resume caller via dispatcher, return handle for symmetric transfer -} -``` - -Semantics: -- The awaitable must use the dispatcher `d` to resume the caller, e.g. `return d(h);`. -- The dispatcher returns a coroutine handle that `await_suspend` may return for symmetric transfer. -- It may run its own work on any context; only resumption must use `d`. - -### 4.4 Task/Promise Requirements (caller role) - -A task type (its promise) **propagates affinity** if it: - -1) **Stores the caller's dispatcher.** - - Provides a way to set/hold a dispatcher instance for the lifetime of the promise. - -2) **Forwards the dispatcher on every await.** - - In `await_transform`, when awaiting `A`, pass the stored dispatcher to the awaited object via an affine awaiter: - ```cpp - // dispatcher_handle: whatever you store for the caller's dispatcher (pointer or reference) - return affine_awaiter{std::forward(a), dispatcher_handle}; - ``` - - The callee receives it as `Dispatcher const&` in `await_suspend(h, d)`. - - If `A` is not affine and you still want affinity, wrap it (e.g. `make_affine(A, dispatcher)`) or reject it at compile time. - -3) **Provides the affine await path for its own awaitability.** - - `await_suspend(caller, dispatcher)` (affine, dispatcher available) that stores dispatcher + continuation before resuming the task's coroutine. - - Optionally provides `await_suspend(caller)` (legacy, no dispatcher) for backward compatibility with non-affine callers. Strict-mode tasks may omit this overload, rejecting non-affine callers at compile time. - - In the affine path, `await_suspend(caller, d)` typically: - - Stores the continuation handle (caller) and the dispatcher `d` in the promise, then - - Returns the task's coroutine handle to start execution, enabling later forwarding (via `await_transform`) and final resumption via the stored dispatcher. - -4) **Final resumption uses the dispatcher when set.** - - In `final_suspend`, if a dispatcher is stored, resume the continuation via that dispatcher; otherwise use direct symmetric transfer. - -### 4.5 Propagation Rule - -- The dispatcher is set once at the top-level task (e.g., `run_on(ex, task)`). -- Each `co_await` forwards the same dispatcher to the awaited object. -- Any awaited object that implements `await_suspend(h, d)` uses that dispatcher to resume its caller, preserving affinity through arbitrary nesting. -- If an awaited object is non-affine and not adapted, affinity may be lost at that point. A trampoline (e.g., `make_affine`) can restore it; alternatively, implementations may reject non-affine awaits at compile time. - ---- - -## 5. Reference Implementation - -The protocol defined in §4 can be implemented entirely as a library. A complete reference implementation is available at: - -https://github.com/cppalliance/corosio - -The implementation includes: - -- **`task`** — A coroutine task type implementing the affine awaitable protocol -- **`make_affine`** — A trampoline wrapper for legacy awaitables that don't implement the protocol -- **`any_dispatcher`** — A type-erased dispatcher for runtime polymorphism -- **`run_async`** — Entry point for launching detached tasks with a dispatcher -- **`run_on`** — Binds a task to execute on a specific dispatcher - ---- - -## 6. Benefits - -**Zero-Overhead Affinity** - -Both P2300 senders and affine awaitables achieve zero allocation per `co_await`—crucially, without depending on HALO. However, senders require conversion of the entire awaitable ecosystem (~100+ lines per awaitable, high expertise with the sender/receiver protocol), while affine awaitables work with existing awaitables through a single overload (~10 lines, low expertise). Legacy awaitables that don't participate in the protocol lose affinity entirely, while `make_affine` provides a fallback for foreign awaitables. - -**Mitigating Risk** - -While senders demonstrate clear value for GPU and parallel workloads, networking represents an unexplored use case in committee discussions. Early studies suggest sender APIs may not align optimally with networking's particular needs—high-frequency, low-latency I/O operations with different composition patterns than parallel algorithms. Adopting senders as the sole path commits the ecosystem to an unproven design for networking workloads. Affine awaitables provide a proven, minimal foundation that works today and remains valuable regardless of sender adoption outcomes, offering lower risk for networking applications. - -**Implementation Trade-offs** - -The affine protocol provides zero-overhead affinity with minimal implementation burden, making it accessible to the entire awaitable ecosystem. Senders offer pipe composition but require ecosystem conversion; affine awaitables provide affinity propagation without requiring conversion. Both approaches require opt-in, but affine awaitables integrate seamlessly with existing code—Boost.Asio, libcoro, folly::coro, and countless custom awaitables can adopt the protocol incrementally. The minimal cost (~10 lines per awaitable) enables immediate adoption while preserving flexibility for future evolution toward senders if desired. - -**Incremental Adoption Path** - -The protocol enables incremental adoption: - -1. **Library authors:** Add `await_suspend(h, dispatcher)` overload to existing awaitables. This is non-breaking—existing code continues to work, and affine-aware tasks gain zero-allocation affinity. - -2. **Task authors:** Implement the protocol requirements (§4.4) in their promise types to propagate affinity through `co_await` chains. - -3. **Foreign awaitables:** For awaitables that cannot be modified (third-party libraries, legacy code), authors can use `make_affine` to opt them into scheduler affinity. This tradeoff may be acceptable when the awaitable is infrequently used or when scheduler affinity is more valuable than the allocation cost. - -4. **Ecosystem evolution:** As more awaitables adopt the protocol, the benefits compound. Libraries can choose to require affine awaitables (rejecting non-affine at compile time) or accept non-affine awaitables with appropriate fallbacks. - -The protocol is minimal (~10 lines per awaitable), library-implementable, and provides a clear migration path from today's awaitable ecosystem to zero-overhead scheduler affinity. - -> **Note (non-normative):** Future awaitables added to the standard library should implement the affine awaitable protocol to enable zero-overhead scheduler affinity. This ensures that standard library awaitables work optimally with scheduler-affine task types. - -### 6.1 Implementation Patterns (Non-Normative) - -The `affine_awaitable` concept enables multiple implementation patterns, allowing library authors to choose the policy that best fits their use case. This section describes non-normative implementation guidance. - -#### 6.1.1 Non-Breaking Overload Addition - -Adding `await_suspend(h, d)` alongside existing `await_suspend(h)` is completely non-breaking. The compiler automatically selects the appropriate overload based on availability: - -| Context | Awaitable Type | Overload Selected | -|---------|---------------|-------------------| -| Legacy awaitable (only `await_suspend(h)`) | Non-affine | `await_suspend(h)` (legacy path) | -| Affine awaitable (both overloads) | Affine | `await_suspend(h, d)` (affine path) | -| Legacy code awaiting affine awaitable | Affine | `await_suspend(h)` (backward compatible) | -| Affine-aware code awaiting legacy awaitable | Non-affine | `await_suspend(h)` (fallback to legacy) | - -This ensures that existing code continues to work unchanged, while new affine-aware code can opt into zero-allocation affinity. - -#### 6.1.2 Tiered vs. Strict: Same Primitives, Different Policies - -The `affine_awaitable` concept enables two distinct usage patterns with the same primitives: - -| Approach | Policy | Use Case | -|----------|--------|----------| -| **Tiered** | Accept all, optimize what we can | Library adoption, gradual migration | -| **Strict** | Reject non-affine at compile time | Performance-critical systems, embedded | - -Both approaches use the same `affine_awaitable` concept and protocol—the difference is in the policy enforced by `await_transform`: - -**Tiered mode** uses `if constexpr` to detect affine awaitables and fall back to `make_affine` for non-affine ones: - -```cpp -template -auto await_transform(Awaitable&& a) { - using A = std::remove_cvref_t; - if constexpr (affine_awaitable) { - return affine_awaiter{std::forward(a), &dispatcher_}; - } else { - return make_affine(std::forward(a), dispatcher_); - } -} -``` - -**Strict mode** uses `static_assert` to reject non-affine awaitables at compile time: - -```cpp -template A> -auto await_transform(A&& a) { - return affine_awaiter{std::forward(a), &dispatcher_}; -} - -template -auto await_transform(A&& a) { - static_assert(affine_awaitable, Dispatcher>, - "Only affine awaitables are accepted in strict mode"); - return affine_awaiter{std::forward(a), &dispatcher_}; -} -``` - -#### 6.1.3 Gradual Library Evolution - -The concept serves as a migration tool with three phases: - -**Phase 1: Status quo** -- Works everywhere -- 1 allocation fallback for non-affine awaitables via `make_affine` - -**Phase 2: Add affine overload** -- Non-breaking addition of `await_suspend(h, d)` to existing awaitables -- Zero-allocation for affine callers -- Legacy callers continue to work unchanged - -**Phase 3: Remove legacy overload** -- Compile-time enforcement via `static_assert` -- Zero-allocation guarantee across the entire chain -- Requires all awaitables in the chain to be affine - -This phased approach allows libraries to migrate incrementally without breaking existing code. - -#### 6.1.4 Example: Strict-Mode Task - -A strict-mode task rejects non-affine awaitables at compile time: - -```cpp -template -class strict_task { - struct promise_type { - any_dispatcher dispatcher_; - - template - auto await_transform(Awaitable&& a) { - using A = std::decay_t; - static_assert(affine_awaitable, - "strict_task only accepts affine awaitables"); - return transform_awaiter{ - std::forward(a), this}; - } - - // ... rest of promise implementation - }; - - // ... rest of task implementation -}; -``` - -This ensures that any attempt to `co_await` a non-affine awaitable results in a compile-time error, providing a hard guarantee of zero allocations. - -#### 6.1.5 Migration Tool, Not Just Detection - -The concept serves a dual purpose that should be explicitly recognized: - -**Detection (tiered mode)**: `if constexpr` selection -- Detects whether an awaitable is affine at compile time -- Falls back to `make_affine` for non-affine awaitables -- Enables gradual migration and library compatibility - -**Enforcement (strict mode)**: `static_assert` guarantee -- Rejects non-affine awaitables at compile time -- Provides hard zero-allocation guarantee -- Enables performance-critical and embedded use cases - -The same `affine_awaitable` concept enables both patterns—it's not just about detection, but about providing a migration path from today's ecosystem to zero-overhead scheduler affinity. - ---- - -## 7. Proposed Wording - -*Relative to N4981.* - -### 7.1 Header `` synopsis - -Add to the `` header synopsis: - -```cpp -namespace std { - // [coroutine.affine.dispatcher], dispatcher concept - template - concept dispatcher = see-below; - - // [coroutine.affine.concept], affine awaitable concept - template - concept affine_awaitable = see-below; -} -``` - -### 7.2 Dispatcher concept [coroutine.affine.dispatcher] - -```cpp -template -concept dispatcher = requires(D const& d, coroutine_handle

h) { - { d(h) } -> std::convertible_to>; -}; -``` - -*Remarks:* The dispatcher must return a `std::coroutine_handle<>` (or convertible type) to enable symmetric transfer. Calling `d(h)` schedules `h` for resumption (typically on a specific execution context) and returns a coroutine handle that the caller may use for symmetric transfer. The dispatcher must be const-callable (logical constness), enabling thread-safe concurrent dispatch from multiple coroutines. Since `coroutine_handle

` has `operator()` which invokes `resume()`, the handle itself is callable and can be dispatched directly. - -### 7.3 Affine awaitable concept [coroutine.affine.concept] - -```cpp -template -concept affine_awaitable = - dispatcher && - requires(A a, coroutine_handle

h, D const& d) { - a.await_suspend(h, d); - }; -``` - -*Remarks:* This concept detects awaitables that participate in the affine awaitable protocol (§4) by providing the extended `await_suspend(h, d)` overload. The dispatcher is passed by const lvalue reference, and the awaitable must use it to resume the caller. The dispatcher returns a coroutine handle that `await_suspend` may return for symmetric transfer. - ---- - -## 8. Summary - -This proposal standardizes a minimal protocol for zero-overhead scheduler affinity in C++ coroutines: - -| Component | Purpose | -|-----------|---------| -| `dispatcher` | Concept for types callable with `coroutine_handle

` | -| `affine_awaitable` | Concept detecting extended `await_suspend` protocol | - -**The protocol (§4):** Awaitables that accept a dispatcher in `await_suspend(h, d)` can resume through it, achieving zero-overhead scheduler affinity. Task types propagate the dispatcher through `co_await` chains, ensuring every coroutine resumes where it belongs. - -**The vision:** Every coroutine resumes where it belongs. No exceptions, no overhead, no rewrites. - ---- - -## Acknowledgements - -Thanks to Matheus Izvekov for feedback on reusable components and third-party coroutine extensibility. Thanks to Klemens Morgenstern for insights on real-world `await_transform` patterns and refining the dispatcher concept to accept coroutine handles directly. Thanks to Mateusz Pusz for detailed feedback on implementation patterns and P3552 integration. Thanks to the authors of P3552 (Dietmar Kühl, Maikel Nadolski) for establishing the foundation this proposal builds upon. - ---- - -## References - -### WG21 Papers - -- **[P2300R10]** Michał Dominiak, Georgy Evtushenko, Lewis Baker, Lucian Radu Teodorescu, Lee Howes, Kirk Shoop, Michael Garland, Eric Niebler, Bryce Adelstein Lelbach. *`std::execution`*. [https://wg21.link/P2300R10](https://wg21.link/P2300R10) - -- **[P3109R0]** Eric Niebler. *A plan for `std::execution` for C++26*. [https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3109r0.html](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3109r0.html) - -- **[P3552R3]** Dietmar Kühl, Maikel Nadolski. *Add a Coroutine Task Type*. [https://wg21.link/P3552](https://wg21.link/P3552) - -- **[P0981R0]** Gor Nishanov, Richard Smith. *Halo: Coroutine Heap Allocation eLision Optimization*. [https://wg21.link/P0981](https://wg21.link/P0981) - -- **[P2855R1]** Ville Voutilainen. *Member customization points for Senders and Receivers*. [https://isocpp.org/files/papers/P2855R1.html](https://isocpp.org/files/papers/P2855R1.html) - -- **[N4981]** Thomas Köppe. *Working Draft, Standard for Programming Language C++*. [https://wg21.link/N4981](https://wg21.link/N4981) - -### Implementations - -- **stdexec** — NVIDIA's reference implementation of P2300. - [https://github.com/NVIDIA/stdexec](https://github.com/NVIDIA/stdexec) - -- **libunifex** — Meta's implementation of the Unified Executors proposal. - [https://github.com/facebookexperimental/libunifex](https://github.com/facebookexperimental/libunifex) - -- **Intel Bare Metal Senders and Receivers** — Embedded adaptation demonstrating implementation challenges. - [https://github.com/intel/cpp-baremetal-senders-and-receivers](https://github.com/intel/cpp-baremetal-senders-and-receivers) - -### Existing Awaitable Ecosystem - -- **Boost.Asio** — Cross-platform C++ library for network and low-level I/O programming, with coroutine support since Boost 1.66 (2017). - [https://www.boost.org/doc/libs/release/doc/html/boost_asio.html](https://www.boost.org/doc/libs/release/doc/html/boost_asio.html) - -- **cppcoro** — Lewis Baker's coroutine library providing task types, generators, and async primitives. Note: This library is not actively maintained. - [https://github.com/lewissbaker/cppcoro](https://github.com/lewissbaker/cppcoro) - -- **libcoro** — Modern C++20 coroutine library. - [https://github.com/jbaldwin/libcoro](https://github.com/jbaldwin/libcoro) - -- **Bloomberg Quantum** — Production multi-threaded coroutine dispatcher. - [https://github.com/bloomberg/quantum](https://github.com/bloomberg/quantum) - -- **folly::coro** — Meta's coroutine library used in production at scale. - [https://github.com/facebook/folly/tree/main/folly/coro](https://github.com/facebook/folly/tree/main/folly/coro) - -### Background Reading - -- Corentin Jabot. *A Universal Async Abstraction for C++*. Discussion of executor models and design trade-offs. - [https://cor3ntin.github.io/posts/executors/](https://cor3ntin.github.io/posts/executors/) - -- Lewis Baker. *C++ Coroutines: Understanding operator co_await*. - [https://lewissbaker.github.io/2017/11/17/understanding-operator-co-await](https://lewissbaker.github.io/2017/11/17/understanding-operator-co-await) - -- Lewis Baker. *C++ Coroutines: Understanding the promise type*. - [https://lewissbaker.github.io/2018/09/05/understanding-the-promise-type](https://lewissbaker.github.io/2018/09/05/understanding-the-promise-type) - ---- - -## Revision History - -### R0 (2025-12-30) - -Initial revision. This paper supersedes the earlier "Simplifying P3552R3 with `make_affine`" draft, which focused narrowly on the trampoline fallback. This revision presents a complete framework: - -- Introduces the `affine_awaitable` concept for zero-overhead scheduler affinity -- Provides helpers to bridge standard coroutine machinery -- Positions affine awaitables as a foundation that P3552's `task` can build upon - -The key insight: passing the dispatcher to the awaitable eliminates the trampoline allocation entirely for opt-in awaitables, while maintaining full compatibility with existing code. - -### R1 (2025-12-31) - -Incorporated feedback and clarifications contributed by Logan McDougall: -- Added formal protocol link and clarified adapter direction in §4.3 -- Clarified tiered vs. strict policies and migration roles in §6 -- Simplified allocation table and references; documented `done_flag_` and reference link -- Noted library implementability and dispatcher extensibility - -### R2 (2025-12-31) - -Focused proposal on core protocol and concepts: -- Removed implementation details (mixins, trampoline, tiered/strict modes) from normative sections -- Inserted protocol specification verbatim from `affine-awaitable-protocol.md` (§4) -- Merged concepts into protocol section (§4.1, §4.2) -- Added non-normative section (§5) describing implementation examples with clear statement that mixins are not proposed -- Simplified proposed wording to only include concepts -- Maintained emotional arc and technical tone while narrowing scope - -### R3 (2026-01-03) - -Refinements based on implementation and feedback: -- Updated dispatcher concept to require return of `std::coroutine_handle<>` for symmetric transfer (§4.1, §7.2) -- Updated all examples and proposed wording to reflect dispatcher return value requirement -- Added implementation patterns section (§6.1) describing tiered vs. strict modes, non-breaking overload addition, and gradual migration path -- Restructured section 6: converted main benefit headings to bold format, renumbered implementation patterns subsection -- Consolidated helper type lists to single canonical reference, replaced other mentions with generic "helpers" -- Updated reference implementation: made `make_affine.hpp` self-contained, removed duplicate definitions from `affine_helpers.hpp` - -### R4 (2026-01-10) - -Aligned paper with reference implementation: -- Updated §4.4 point 3: legacy `await_suspend(caller)` path is now optional; strict-mode tasks may omit it -- Simplified §5: removed mixin examples, now points directly to reference implementation at corosio -- Updated all repository links to https://github.com/cppalliance/corosio -- Updated §6.1.4 example to use actual implementation patterns (`transform_awaiter`, `any_dispatcher`) -- Removed references to mixins (`affine_promise`, `affine_task`, `affine_awaiter`) throughout - - diff --git a/context/allocator.md b/context/allocator.md deleted file mode 100644 index 27da9a6..0000000 --- a/context/allocator.md +++ /dev/null @@ -1,31 +0,0 @@ -# Coroutine Frame Allocation Model - -## Problem Statement - -Control allocation of every coroutine frame in a coroutine chain, including internal coroutines created by the framework (e.g., `run_async_task` in `run_async`). The API must specify the frame allocator BEFORE the coroutine is created—once the coroutine call expression evaluates, it's too late. - -## Requirements - -The system must be: -- invisible to the user unless they ask -- give great performance by default -- give execution contexts a way to override the default -- give the user a way to force a specific allocator -- ergonomic to use - -## System Properties & Constraints - -1. **Allocation happens at call site** — Coroutine frame is allocated when the coroutine function is *called*, not when awaited -2. **Promise type controls allocation** — `operator new` overloads in the promise type are the only customization point -3. **Arguments are inspected** — The promise's `operator new` receives copies of the coroutine arguments -4. **Internal coroutines exist** — `run_async_task` (and potentially others) are implementation details -5. **Allocator must be specified BEFORE creation** — Once the coroutine call expression evaluates, it's too late - -## Proposed Solution - -An execution_context has a default frame allocator - -the `io_object` provides the default allocator from its execution_context - -`run_async` and `run_on` use the default allocator from the I/O object unless overridden, or no I/O object - diff --git a/context/asio-io-context.md b/context/asio-io-context.md deleted file mode 100644 index 44ae3aa..0000000 --- a/context/asio-io-context.md +++ /dev/null @@ -1,116 +0,0 @@ -## Asio Event Loop Architecture - -### Core Abstraction - -``` -io_context - └── scheduler (owns work queue, coordinates threads) - └── reactor (optional, lazy, handles I/O readiness) -``` - ---- - -### Platform Implementations - -**Linux (epoll)** -``` -io_context - └── scheduler - ├── op_queue_ (posted work) - └── epoll_reactor (service, lazy) - ├── epoll_fd_ - ├── eventfd_ (wake-up) - └── timer_queue_ -``` -- Reactor model: "fd is ready, now you call recv()" -- Scheduler and reactor are separate services -- `epoll_wait()` blocks, wakes on fd events or eventfd poke - -**macOS/BSD (kqueue)** -``` -io_context - └── scheduler - └── kqueue_reactor (service, lazy) - ├── kqueue_fd_ - └── timer_queue_ -``` -- Same reactor model as epoll -- kqueue handles more event types natively (timers, signals, vnodes) -- Otherwise identical architecture to Linux - -**Windows (IOCP)** -``` -io_context - └── win_iocp_io_context (scheduler + IOCP combined) - ├── iocp_ (HANDLE) - └── select_reactor (fallback, rare) -``` -- Proactor model: "OS completed the read, here's your data" -- No separate scheduler — IOCP *is* the work queue -- `PostQueuedCompletionStatus()` for post, `GetQueuedCompletionStatus()` for run -- `select_reactor` exists for edge cases only - ---- - -### Scheduler Responsibilities - -1. Own the work queue (`op_queue_`) -2. Track outstanding work for `run()` exit -3. Coordinate threads calling `run()` -4. Integrate reactor (or be the reactor on Windows) -5. Handle stop/restart - ---- - -### Reactor Responsibilities - -1. Wrap OS multiplexing API -2. Track fd → callback registrations -3. Block waiting for events -4. Enqueue completions back to scheduler -5. Provide wake-up mechanism for `post()` - ---- - -### Event Loop Comparison - -| | Linux | macOS | Windows | -|---|---|---|---| -| Mechanism | epoll | kqueue | IOCP | -| Model | Reactor | Reactor | Proactor | -| Scheduler | Separate class | Separate class | Combined | -| I/O syscall | After notification | After notification | Before notification | -| Wake-up | eventfd | kqueue user event | PostQueuedCompletionStatus | -| Timer handling | timerfd or internal queue | kqueue native | Internal queue | - ---- - -### Lifecycle - -```cpp -io_context ioc; // scheduler created (IOCP created on Windows) - // reactor NOT created yet on Linux/macOS - -tcp::socket s(ioc); // use_service() → epoll_create - -ioc.run(); // scheduler::run() → epoll_wait or GQCS -``` - ---- - -### run() Unified Model - -Regardless of platform, `run()` does: - -``` -while (!stopped) { - if (work in queue) - execute it - else if (reactor) - block in reactor, harvest completions - else - condvar wait for post() -} -``` - -The abstraction hides whether you're on a reactor (Linux/macOS) or proactor (Windows) — same `async_read_some()` call, different underlying mechanics. \ No newline at end of file diff --git a/context/coro-first-io.md b/context/coro-first-io.md deleted file mode 100644 index 68fcab2..0000000 --- a/context/coro-first-io.md +++ /dev/null @@ -1,1174 +0,0 @@ -| Document | D0000 | -|----------|-------| -| Date: | 2025-12-31 -| Reply-to: | Vinnie Falco \ -| Audience: | SG1, LEWG - ---- - -# Coroutine-First I/O: A Type-Erased Affine Framework - -## Abstract - -This paper asks: *if we design an asynchronous framework from the ground up with networking as the primary use case, what does the best possible API look like?* - -We present a coroutine-first I/O framework that achieves executor flexibility without sacrificing encapsulation. The *affine awaitable protocol* propagates execution context through coroutine chains without embedding executor types in public interfaces. Platform I/O types remain hidden in translation units, while composed algorithms expose only `task` return types. - -A central design choice: we consciously trade one pointer indirection per I/O operation for complete type hiding. This tradeoff—approximately 1-2 nanoseconds of overhead—eliminates the template complexity, compile time bloat, and ABI instability that have long plagued C++ async libraries. In contrast, `std::execution`'s `connect(sender, receiver)` returns a fully-typed `operation_state`, forcing implementation types through every API boundary. - -We compare our networking-first design against `std::execution` (P2300) and observe significant divergence. Analysis of P2300 and its evolution (P3826) reveals a framework driven by GPU and parallel workloads—P3826's focus is overwhelmingly GPU/parallel with no networking discussion. Core networking concerns—strand serialization, I/O completion contexts, platform integration—remain unaddressed. The query-based context model that P3826 attempts to fix is unnecessary when context propagates forward through coroutine machinery. - -Our framework demonstrates what emerges when networking requirements drive the design rather than being adapted to a GPU-focused abstraction. - ---- - -## 1. The Problem: Callback Hell Meets Template Hell - -Traditional asynchronous I/O frameworks face an uncomfortable choice. The callback model—templated on executor and completion handler—achieves zero-overhead abstraction but leaks implementation details into every public interface: - -```cpp -template -void async_read(Executor ex, Handler&& handler); -``` - -Every composed operation must propagate these template parameters, creating a cascade of instantiations. The executor type, the handler type, and often an allocator type all become part of the function signature. Users cannot hold heterogeneous operations in containers. Libraries cannot define stable ABIs. The "zero-cost abstraction" exacts its price in compile time, code size, and API complexity. - -Type erasure offers an escape, but the traditional approach—`std::function` for handlers, `any_executor` for executors—introduces heap allocations at every composition boundary. Early testing showed this penalty compounds catastrophically: a four-level operation using fully type-erased callbacks allocated hundreds of times per invocation and ran an order of magnitude slower than native templates. We quickly eliminated this configuration from further consideration—it is not competitive. - -We sought a third path: *what would an asynchronous I/O framework look like if we designed it from first principles for networking?* Not adapting an existing abstraction, not generalizing from parallel algorithms, but starting with sockets, buffers, strands, and completion ports as the primary concerns. - -The answer diverges significantly from `std::execution`. Where templates propagate through every interface, we use structural type erasure. Where algorithm customization requires domain transforms, we need none—networking has one implementation per platform; TCP servers don't run on GPUs. Section 2.5 explores these differences in detail. - ---- - -## 2. Motivation: Why Not the Networking TS? - -The Networking TS (and its progenitor Boost.Asio) is the de facto standard for asynchronous I/O in C++. It is mature, well-tested, and supports coroutines through completion tokens. Why build something new? - -### 2.1 The Template Tax - -The Networking TS design philosophy—zero-overhead abstraction through templates—incurs costs that compound in large codebases: - -**Every async operation signature includes executor and handler types:** -```cpp -template -void async_read(basic_socket& s, - MutableBufferSequence const& buffers, - ReadHandler&& handler); -``` - -**Composed operations propagate these types:** -```cpp -template -void async_http_request(basic_socket& sock, - http::request const& req, - Handler&& handler); -``` - -This creates: -- **N×M template instantiations** for N operations × M executor/handler combinations -- **Header-only dependencies** that must be recompiled on change -- **Binary size growth** that can reach megabytes in complex applications -- **Compile times measured in minutes** for moderate codebases -- **Nested move-construction overhead** at runtime—composed handlers like `read_op>>` must be moved at each abstraction layer, with costs that compound as nesting depth increases (see [Cost Analysis](https://github.com/cppalliance/wg21-papers/blob/master/coro-first-io/COST.md)) - -### 2.2 The Encapsulation Problem - -The Networking TS templates expose implementation details through public interfaces: - -```cpp -// User sees: -class http_client -{ -public: - template - void async_get(Executor ex, std::string url, Handler&& h); -}; -``` - -The user is forced to know: -- What executor types are valid -- What handler signatures are expected -- That the implementation uses sockets at all - -Platform types (`OVERLAPPED`, `io_uring_sqe`) do not appear in public API signatures—users write portable code. But they are not hidden from translation units. Asio is header-only; its internal operation types publicly inherit from `OVERLAPPED`; including `` pulls Windows headers into every compilation unit. The *API* is portable; the *compilation* is not. And the *structure* of the async machinery still leaks through every API boundary. - -**Our approach:** -```cpp -class http_client -{ -public: - task async_get(std::string url); // That's it. -}; -``` - -The implementation—sockets, buffers, executors—lives in the translation unit. The interface is stable. The ABI is stable. Compilation is fast. - -### 2.3 Coroutine-Compatible vs Coroutine-First - -The Networking TS added coroutine support through completion tokens like `use_awaitable`: - -```cpp -co_await async_read(socket, buffer, use_awaitable); -``` - -This adapts callback-based operations for coroutines. It works, but: -- **Double indirection**: Callback machinery wraps coroutine machinery -- **Executor handling is manual**: `co_spawn` and `bind_executor` required -- **Error handling diverges**: Exceptions vs `error_code` vs `expected` -- **Mental model mismatch**: Writing coroutines that think in callbacks - -Our design is **coroutine-first**: the suspension/resumption model is the foundation, not an adapter. Executor propagation is automatic. Type erasure is structural. The callback path (`dispatch().resume()`) is the compatibility layer, not the other way around. - -### 2.4 When to Use the Networking TS Instead - -The Networking TS remains the right choice when: -- You need callback-based APIs for C compatibility -- Template instantiation cost is acceptable -- You're already invested in the Asio ecosystem -- Maximum performance with zero abstraction is required -- Standardization timeline matters for your project - -**Caution:** The "zero abstraction" advantage erodes with composition depth. Our [Cost Analysis](https://github.com/cppalliance/wg21-papers/blob/master/coro-first-io/COST.md) shows that callbacks outperform coroutines at shallow nesting (1-2 levels), but the relationship inverts at deeper levels—by Level 4 (1000 operations), callbacks can be 3.4× slower than coroutines on Clang due to nested move-construction overhead. MSVC achieves near-parity (1.11×), while GCC shows intermediate results (1.77×). - -Our framework is better suited when: -- Coroutines are the primary programming model -- Public APIs must hide implementation details -- Compile time and binary size matter -- ABI stability is required across library boundaries -- Clean, simple interfaces are prioritized - -### 2.5 Why Not std::execution? - -The C++26 `std::execution` framework (P2300) provides sender/receiver abstractions for asynchronous programming. We analyzed both the base proposal and its evolution to understand how well it addresses networking. - -#### 2.5.1 P2300: Networking Mentioned, Not Addressed - -P2300 references networking and GPU/parallel topics in roughly equal measure. Superficially balanced. However, the *nature* of these references differs: - -| Aspect | GPU/Parallel | Networking | -|--------|--------------|------------| -| Concrete primitives | `bulk` algorithm, parallel scheduler | None — deferred to P2762 | -| Platform integration | CUDA mentioned by name | No IOCP, epoll, or io_uring | -| Serialization | N/A | **Zero strand mentions** | -| Status | Normative specification | Illustrative examples only | - -P2300 explicitly defers networking: - -> "Dietmar Kuehl has proposed networking APIs that use the sender/receiver abstraction (see P2762)." — P2300, Section 1.4.1.3 - -The framework provides composition primitives but **no I/O operations, no buffer management, and no strand serialization**. Networking exists as examples, not as design drivers. - -**What P2762 actually proposes:** - -P2762, the proposal P2300 defers to, explicitly chooses the backward query model as "most useful": - -> "Injecting the used scheduler from the point where the asynchronous work is actually used." - -Yet the paper immediately acknowledges the late-binding problem: - -> "When the scheduler is injected through the receiver the operation and used scheduler are brought together rather late, i.e., when `connect(sender, receiver)`ing and when the work graph is already built." - -P2762 also raises per-operation cancellation overhead as a concern—the same issue our socket-level approach solves—but offers no solution: - -> "Repeatedly doing that operation while processing data on a socket may be a performance concern... This area still needs some experimentation and, I think, design." - -P2762 leaves core networking features unspecified: - -| Feature | P2762 Status | -|---------|--------------| -| Scheduler interface | "It isn't yet clear how such an interface would actually look" | -| Strand serialization | Not mentioned | -| Buffer pools | "There is currently no design" | - -This confirms our thesis: networking is being adapted to `std::execution` rather than driving its design. The proposal acknowledges problems with the query model, identifies performance concerns we solve, yet leaves core networking abstractions unaddressed. - -#### 2.5.2 P3826: GPU-First Evolution - -P3826R2, "Fix or Remove Sender Algorithm Customization," reveals where `std::execution`'s complexity originates. The paper identifies a fundamental flaw: - -> "Many senders do not know where they will complete until they know where they will be started." - -The proposed fix adds `get_completion_domain` queries, `indeterminate_domain<>` types, and double-transform dispatching. This machinery exists to select GPU vs CPU algorithm implementations. - -**P3826 contains over 100 references to GPU, parallel execution, and hardware accelerators. It contains zero references to networking, sockets, or I/O patterns.** - -| Concern | GPU/Parallel (P3826's Focus) | Networking (Unaddressed) | -|---------|------------------------------|--------------------------| -| Algorithm dispatch | Critical — GPU vs CPU kernel | Irrelevant — one implementation per platform | -| Completion location | May differ from start | Same context or OS completion port | -| Domain customization | Required for acceleration | Unused overhead | - -Networking requires different things entirely: -- **Strand serialization**: Ordering guarantees, not algorithm selection -- **I/O completion contexts**: IOCP, epoll, io_uring integration -- **Executor propagation**: Ensuring handlers run on the correct thread -- **Buffer lifetime management**: Zero-copy patterns, scatter/gather - -None appear in P3826's analysis. - -#### 2.5.3 The Query Model Problem - -Both P2300 and P3826 share a design choice: **backward queries** for execution context. - -```cpp -// std::execution pattern: query sender/receiver for properties -auto sched = get_scheduler(get_env(rcvr)); -auto domain = get_domain(get_env(sndr)); - -// P3826 addition: tell sender where it starts -auto domain = get_completion_domain(get_env(sndr), get_env(rcvr)); -``` - -This requires senders to be self-describing—problematic when context is only known at the call site. P3826 attempts to fix this by passing hints, but the fix adds complexity rather than questioning the query model itself. - -#### 2.5.4 Forward Propagation Alternative - -Our coroutine model propagates context forward: - -```cpp -// Coroutine-first: context flows from caller to callee -run_async(my_executor, my_task()); // Executor injected at root -// await_transform propagates any_executor const& to all children — no queries needed -``` - -The executor is always known because it flows with control flow, not against it. No domain queries. No completion scheduler inference. No transform dispatching. - -#### 2.5.5 Observations - -Comparing our networking-first design with `std::execution` reveals significant divergence: - -1. **What we need, they don't have**: Strand serialization, platform I/O integration, buffer management -2. **What they have, we don't need**: Domain-based algorithm dispatch, completion domain queries, sender transforms -3. **Context flow**: They query backward; we inject forward - -The `std::execution` framework may eventually support networking, but its evolution is driven by GPU and parallel workloads. The complexity P3826 adds—to fix problems networking doesn't have—suggests that networking was not a primary design consideration. - -A framework designed from networking requirements produces a simpler, more direct solution. - -#### 2.5.6 The std::execution Tax - -If networking is required to integrate with `std::execution`, I/O libraries must pay a complexity tax: - -- **Query protocol compliance**: Implement `get_env`, `get_domain`, `get_completion_scheduler`—even if only to return defaults -- **Concept satisfaction**: Meet sender/receiver requirements designed for GPU algorithm dispatch -- **Transform machinery**: Domain transforms execute even when they select the only available implementation -- **API surface expansion**: Expose attributes and queries irrelevant to I/O operations -- **Type leakage**: `connect(sender, receiver)` returns a fully-typed `operation_state`, forcing implementation details through every API boundary - -The type leakage deserves emphasis. In the sender/receiver model: - -```cpp -// std::execution pattern -execution::sender auto snd = socket.async_read(buf); -execution::receiver auto rcv = /* ... */; -auto state = execution::connect(snd, rcv); // Type: connect_result_t -``` - -The `connect_result_t` type encodes the full operation state. Algorithms that compose senders must propagate these types: - -```cpp -// From P2300: operation state types leak into composed operations -template -struct _retry_op { - using _child_op_t = stdexec::connect_result_t>; - optional<_child_op_t> o_; // Nested operation state, fully typed -}; -``` - -This tax exists regardless of whether networking benefits from it. A socket that returns `default_domain` still participates in the dispatch protocol. The P3826 machinery runs, finds no customization, and falls through to the default—overhead for nothing. - -**The question is not whether P2300/P3826 break networking code.** They don't—defaults work. **The question is whether networking should pay for abstractions it doesn't use.** Our analysis suggests the cost is not justified when a simpler, networking-native design achieves the same goals. - -#### 2.5.7 Divergent Standardization Efforts - -WG21 faces a troubling pattern: std::execution continues evolving without addressing networking needs (§2.5.1-2.5.6), while separate proposals ([P3185](https://wg21.link/p3185), [P3482](https://wg21.link/p3482)) advocate replacing proven socket-based designs with the IETF TAPS framework—an abstract architecture with essentially no production deployment after a decade of development. - -Our analysis in [IETF TAPS versus Boost.Asio](https://github.com/cppalliance/wg21-papers/blob/master/ietf-taps-vs-asio.md) documents the risks of framework-first design. History favors proven practice: TCP/IP over OSI, BSD sockets over every proposed replacement, Boost.Asio's twenty years of field experience over speculative architecture. - -This paper demonstrates that a networking-first coroutine framework is both achievable and practical—without waiting for std::execution to address needs it wasn't designed for, and without adopting an untested abstraction model. - ---- - -## 3. The Executor Model - -*This section defines our executor abstraction—deliberately simpler than std::execution's scheduler model because networking needs dispatch and post, not algorithm customization.* - -**Terminology note:** We use the term *executor* rather than *scheduler* intentionally, to avoid conflating the two concepts. In `std::execution`, schedulers are designed for heterogeneous computing—selecting GPU vs CPU algorithms, managing completion domains, and dispatching to hardware accelerators. Networking has different needs: strand serialization, I/O completion contexts, and thread affinity. By using *executor* for our abstraction, we signal that this is a distinct concept tailored to networking's requirements, not an adaptation of the scheduler model. This terminology is also an homage to the quarter-century of pioneering work from Chris Kohlhoff, whose executor model in Boost.Asio established the foundation for modern C++ asynchronous I/O. - -C++20 coroutines provide type erasure *by construction*—but not through the handle type. `std::coroutine_handle` and `std::coroutine_handle` are both just pointers with identical overhead. The erasure that matters is *structural*: - -1. **The frame is opaque**: Callers see only a handle, not the promise's layout -2. **The return type is uniform**: All coroutines returning `task` have the same type, regardless of body -3. **Suspension points are hidden**: The caller doesn't know where the coroutine may suspend - -This structural erasure is often lamented as overhead, but we recognized it as opportunity: *the allocation we cannot avoid can pay for the type erasure we need*. - -A coroutine's promise can store execution context *by reference*, receiving it from the caller at suspension time rather than at construction time. This deferred binding enables a uniform `task` type that works with any executor, without encoding the executor type in its signature. - -We define an executor as any type satisfying the `executor` concept: - -```cpp -template -concept executor = std::derived_from; -``` - -An executor is simply a type that derives from `any_executor`. The base class defines the required interface: `dispatch()` for symmetric transfer and `post()` for deferred execution. Derived types inherit these methods and provide concrete implementations. - -### 3.1 Type-Erased Executor Base - -To store executors without encoding their type, we define `any_executor`—an abstract base class: - -```cpp -struct any_executor -{ - virtual ~any_executor() = default; - virtual coro dispatch(coro h) const = 0; - virtual void post(work* w) const = 0; -}; -``` - -Concrete executors (such as `io_context::executor`) derive from `any_executor` and implement the virtual methods. Coroutine promises store `any_executor const*`—a single pointer—to reference the caller's executor without encoding its concrete type. - -The abstract base class prevents slicing: pure virtual methods make `any_executor` non-instantiable, while derived executor types remain copyable for use at call sites. This design costs **one pointer** per stored executor reference, with virtual dispatch overhead equivalent to function pointer indirection. - -### 3.2 Executor Composition and Placement - -A subtle but important property: executor types are fully preserved at call sites even though they're type-erased internally. This enables zero-overhead composition at the API boundary while maintaining uniform internal representation. - -**Composable executor wrappers work naturally:** - -```cpp -template -struct strand { - Ex inner_; - - coro dispatch(coro h) const { - // Serialization logic, then delegate to inner - return inner_.dispatch(h); - } - // ... -}; - -// Full type information at the call site -strand s{pool.get_executor()}; -run_async(s, my_task()); // strand passed by value -``` - -When `run_async` stores the executor in the root task's frame, it preserves the complete type. The `strand` wrapper's serialization logic remains inlinable. Only when the executor propagates to child tasks—as `any_executor const*`—does type erasure occur. The erasure boundary is pushed to where it matters least: internal propagation rather than the hot dispatch path. - -**Why executor customization matters—even single-threaded:** - -Executors encode *policy*, not just parallelism. A single-threaded `io_context` still benefits from executor customization: - -- **Serialization**: `strand` ensures operations on one connection don't interleave, even when callbacks fire in arbitrary order -- **Deferred execution**: `post` vs `dispatch` controls stack depth and allows pending work to run before continuing -- **Instrumentation**: Wrapper executors can measure latency, count operations, or propagate tracing context -- **Prioritization**: Critical control messages can preempt bulk data transfers -- **Testing**: Manual executors enable deterministic unit tests without real async timing - -The `executor` concept and `any_executor` base class support all these patterns without baking policy into I/O object types. - -**Why the executor belongs at the call site, not on the I/O object:** - -- **The caller knows the concurrency requirements.** A multithreaded `io_context` may need strand-wrapped executors; a single-threaded context may not. The socket cannot know this—it only knows how to perform I/O. - -- **I/O objects become context-agnostic.** A socket works identically whether wrapped in a strand, run on a thread pool, or executed on a single-threaded context. The same socket type serves all deployment scenarios. - -- **Composition happens at the edge.** Users compose executors (`strand`, `priority_executor>`) at the point of use. The I/O object doesn't need N template parameters for N possible wrapper combinations. - -**The coroutine advantage:** - -Traditional callback-based designs often embed the executor in the I/O object's type, creating signatures like `basic_socket`. This leaks concurrency policy into type identity—two sockets with different executors have different types, complicating containers and APIs. - -Coroutines invert this relationship. The executor enters at `run_async` or `run_on`, propagates invisibly through `any_executor const&`, and reaches I/O operations via the affine awaitable protocol's `await_transform`. The I/O object participates in executor selection without encoding it in its type: - -```cpp -// Traditional: executor embedded in socket type -basic_socket> sock; - -// Coroutine model: executor supplied at launch -socket sock; // No executor type parameter -run_async(strand{pool.get_executor()}, use_socket(sock)); -``` - -The socket receives its executor indirectly—through the coroutine machinery—yet still benefits from the caller's choice of strand wrapping, thread pool, or any other executor composition. - -### 3.3 Executor Wrapping: A Trade-off - -For completeness, we acknowledge a case where callbacks are superior: executor wrappers that inject behavior around the posted work. - - - - - - - - - - - - - - -
CallbacksOur Coroutine Model
- -```cpp -template -struct wrapper { - Ex ex_; - - template - void post(F&& f) { - struct impl { - decay_t f_; - void operator()() { - other_thing(); - f_(); - } - }; - ex_.post(impl{move(f)}); - } -}; -``` - - - -```cpp -template -struct wrapper { - Ex ex_; - - void post(work* w) { - struct impl : work { - work* inner_; - void operator()() { - other_thing(); - (*inner_)(); - } - }; - ex_.post(new impl{w}); - } -}; -``` - -
Cost: 0 allocationsCost: 1 allocation
- -Callbacks compose without allocation because `impl` embeds `F` by value into existing operation state. Our model requires allocating a `work` wrapper to inject behavior, but preserves the capability that raw `coroutine_handle<>` loses entirely. - -Most practical executor wrappers—strands, thread pools, priority queues—do not need to wrap the work itself, only manage when and where it executes. For these, our model pays no penalty. The allocation cost appears only for exotic wrappers that must inject behavior around the work's execution. - ---- - -## 4. Platform I/O: Hiding the Machinery - -A central goal is encapsulation: platform-specific types (`OVERLAPPED`, `io_uring_sqe`, file descriptors) should not appear in public headers. We achieve this through *preallocated, type-erased operation state*. - -### 4.1 The Socket Abstraction - -```cpp -struct socket -{ - socket() : op_(new state) {} - - async_awaitable async_io(); - -private: - struct state : work - { - coro h_; - any_executor const* ex_; - // OVERLAPPED, HANDLE, etc. — hidden here - - void operator()() override - { - ex_->dispatch(h_)(); // Resume the returned handle - } - }; - - std::unique_ptr op_; -}; -``` - -The `state` structure: -1. Inherits from `work`, enabling intrusive queuing -2. Stores the coroutine handle and executor reference needed for completion -3. Contains platform-specific members (OVERLAPPED, handles) invisible to callers -4. Is allocated *once* at socket construction, not per-operation - -### 4.2 Intrusive Work Queue - -Submitted work uses an intrusive singly-linked list: - -```cpp -struct work -{ - virtual ~work() = default; - virtual void operator()() = 0; - work* next = nullptr; -}; -``` - -This design eliminates container allocations—each work item carries its own link. The `queue` class manages head and tail pointers with O(1) push and pop. Combined with the preallocated socket state, I/O operations require *zero allocations* in steady state. - -### 4.3 The Encapsulation Tradeoff - -We pay a cost for translation unit hiding: one level of indirection through the preallocated `state` pointer. This addresses the template tax described in §2.1. - -**The alternative: frame-embedded operation state** - -If I/O types were exposed in headers, operation state could live directly in the coroutine frame: - -```cpp -// Hypothetical: types exposed, state in frame -template -task async_read(Socket& s, buffer buf) { - typename Socket::read_op op{s, buf}; // State in coroutine frame - co_await op; - co_return op.bytes_transferred(); -} -``` - -This eliminates the indirection—the operation state is allocated as part of the coroutine frame, not separately. When the compiler knows the concrete types, it can potentially inline everything. This is the path `std::execution` takes with `connect_result_t` (see §2.5.6). - -**Why we chose encapsulation:** - -| Concern | Frame Embedding | Our Approach | -|---------|-----------------|--------------| -| Platform types in headers | `OVERLAPPED`, `io_uring_sqe` visible | Hidden in `.cpp` | -| ABI stability | Breaks on implementation change | Stable across versions | -| Compile time | Full template instantiation | Minimal header parsing | -| Binary size | N×M instantiations | One per operation | -| Refactoring cost | Recompile all users | Recompile one TU | - -The cost is **one pointer dereference per I/O operation**—typically 1-2 nanoseconds. Section 9 shows this is negligible for I/O-bound workloads. - ---- - -## 5. The Affine Awaitable Protocol - -The core innovation is how execution context flows through coroutine chains. We extend the standard awaitable protocol with an *affine* overload of `await_suspend` that returns a coroutine handle for symmetric transfer. - -> **Formal specification:** The complete affine awaitable protocol is specified in [Affine Awaitable Protocol](https://github.com/cppalliance/wg21-papers/blob/master/affine-awaitable-protocol.md). A detailed proposal for standardization, including integration with P2300 senders and migration strategies, is presented in [Affine Awaitables: Zero-Overhead Scheduler Affinity for the Rest of Us](https://github.com/cppalliance/wg21-papers/blob/master/affine-awaitables.md). - -```cpp -template -std::coroutine_handle<> await_suspend(coro h, Executor const& ex) const; -``` - -This two-argument form receives both the suspended coroutine handle and the caller's executor, and returns the next coroutine to resume. - -**Execution context note:** For asynchronous awaitables (where `await_ready()` returns `false`), the awaited operation completes in whatever context the underlying I/O system uses—typically an OS completion thread or reactor callback. The `await_resume()` member function executes in that completion context. The affine trampoline then dispatches *back* to the caller's scheduler before returning control, ensuring the caller resumes in the expected context. - -The caller's promise uses `await_transform` to inject the executor: - -```cpp -template -auto await_transform(Awaitable&& a) -{ - struct transform_awaiter - { - std::decay_t a_; - promise_type* p_; - - auto await_suspend(std::coroutine_handle h) - { - return a_.await_suspend(h, p_->ex_); // Inject executor, return handle - } - }; - return transform_awaiter{std::forward(a), this}; -} -``` - -This mechanism achieves implicit executor propagation: child coroutines inherit their parent's executor without explicit parameter passing. - -### 5.1 Symmetric Transfer - -A deliberate design choice: `await_suspend` returns `std::coroutine_handle<>` rather than `void`. When `await_suspend` returns a handle, the runtime resumes that coroutine *without growing the stack*—effectively a tail call. This prevents stack overflow in deep coroutine chains. - -Without symmetric transfer: -``` -A finishes → dispatch(B) → B finishes → dispatch(C) → ... - ↓ ↓ - stack grows stack grows more -``` - -With symmetric transfer: -``` -A finishes → return B → runtime resumes B → return C → runtime resumes C - (constant stack depth) -``` - -The executor's `dispatch` method also returns a handle: - -```cpp -coro dispatch(coro h) const -{ - return h; // Return handle for symmetric transfer -} -``` - -If the executor must post rather than dispatch (cross-thread), it returns `std::noop_coroutine()`. - -### 5.2 Sender/Receiver Compatibility - -The design is compatible with the coroutine task type now in the C++26 working draft (merged from P3552) and `std::execution`. The `dispatch()` method returns a `std::coroutine_handle<>` that can be used in two ways: - -- **Symmetric transfer** (coroutine context): Return the handle from `await_suspend` -- **Explicit resume** (sender context): Call `handle.resume()` directly - -```cpp -// In a sender's completion: -ex.dispatch(continuation).resume(); -``` - -If `dispatch()` posts work rather than dispatching inline, it returns `std::noop_coroutine()`. Calling `.resume()` on this is defined as a no-op, making the API safe in all contexts: - -```cpp -coro dispatch(coro h) const -{ - return h; // Return for symmetric transfer OR explicit resume -} - -// Coroutine caller: -return ex.dispatch(h); // Symmetric transfer - -// Sender/callback caller: -ex.dispatch(h).resume(); // Explicit resume, noop if posted -``` - -This means one executor interface serves both coroutines and senders with no conditional code paths. - -### 5.3 The Task Type - -The `task` type represents a lazy, composable coroutine: - -```cpp -struct task -{ - struct promise_type - { - any_executor const* ex_; // Where this task runs - any_executor const* caller_ex_; // Where to resume caller - coro continuation_; // Caller's coroutine handle - // ... - }; - - std::coroutine_handle h_; - bool has_own_ex_ = false; -}; -``` - -When awaited, a task: -1. Stores the caller's continuation and executor -2. Either inherits the caller's executor (default) or uses its own (if `set_executor` was called) -3. Returns the child's handle for symmetric transfer (or `noop_coroutine()` if posted) - -At completion (`final_suspend`), the task returns the continuation for symmetric transfer: - -```cpp -std::coroutine_handle<> await_suspend(coro h) const noexcept -{ - std::coroutine_handle<> next = std::noop_coroutine(); - if(p_->continuation_) - next = p_->caller_ex_->dispatch(p_->continuation_); - h.destroy(); - return next; -} -``` - ---- - -## 6. Executor Injection: Launch and Switch - -Coroutines need executors at two points: at the **root** (where does the whole operation run?) and **mid-chain** (how do I switch to a different executor for one sub-operation?). This section covers both primitives. - -### 6.1 Mid-Chain Switching: `run_on` - -Any coroutine can switch executors mid-operation using `run_on`: - -```cpp -template -task run_on(Executor const& ex, task t) -{ - t.set_executor(ex); - return t; -} - -// Usage: -co_await run_on(other_executor, some_operation()); -``` - -When awaited, a task with its own executor posts a starter work item to that executor rather than resuming inline. Upon completion, it dispatches back to the *caller's* executor—not its own—ensuring seamless return to the original context. - -This separation of `ex_` (where I run) and `caller_ex_` (where my caller resumes) enables crossing executor boundaries without explicit continuation management. - -**Practical use cases for mid-chain switching:** - -```cpp -// Offload CPU-intensive work to a thread pool -co_await run_on(cpu_pool, compute_heavy_task()); - -// Serialize access to shared state -co_await run_on(strand, protected_state_update()); - -// Instrument a specific subtree with tracing -co_await run_on(traced_executor, operation_to_measure()); - -// Prioritize urgent work -co_await run_on(high_priority_executor, send_control_message()); -``` - -These patterns work identically whether the underlying context is single-threaded or multi-threaded—the executor abstraction is about policy, not just parallelism. - -### 6.2 Root Ownership: `run_async` - -Top-level coroutines present a lifetime challenge: the executor must outlive all operations, but the coroutine owns only references. We solve this with a wrapper coroutine that *owns* the executor: - -```cpp -template -struct run_async_task -{ - struct starter : work { /* embedded in promise */ }; - - struct promise_type - { - Executor ex_; // Owned by value - starter start_; // Embedded—no allocation needed - // ... - }; - std::coroutine_handle h_; -}; - -template -void run_async(Executor ex, task t) -{ - auto root = wrapper(std::move(t)); - root.h_.promise().ex_ = std::move(ex); - - // Post embedded starter—avoids allocation since run_async_task is long-lived - root.h_.promise().start_.h_ = root.h_; - root.h_.promise().ex_.post(&root.h_.promise().start_); - root.release(); -} -``` - -The `run_async_task` frame lives on the heap and contains the executor by value. All child tasks receive `any_executor const&` referencing this frame-owned executor. The frame self-destructs at `final_suspend`, after all children have completed. - -This design provides two key benefits: **cheap type-erasure** (child tasks store only `any_executor const*`, not the concrete executor type) and **automatic lifetime management** (the executor lives exactly as long as the operation tree it serves). - ---- - -## 7. Frame Allocator Customization - -The default coroutine allocation strategy—one heap allocation per frame—is suboptimal for repeated operations. We introduce a *frame allocator protocol* that allows I/O objects to provide custom allocation strategies. - -### 7.1 The Frame Allocator Concepts - -```cpp -template -concept frame_allocator = requires(A& a, void* p, std::size_t n) { - { a.allocate(n) } -> std::convertible_to; - { a.deallocate(p, n) } -> std::same_as; -}; - -template -concept has_frame_allocator = requires(T& t) { - { t.get_frame_allocator() } -> frame_allocator; -}; -``` - -The `has_frame_allocator` concept allows I/O objects to opt-in explicitly by providing a `get_frame_allocator()` member function. - -### 7.2 Why I/O Objects? - -The frame allocator lives on I/O objects rather than the executor or root task—a Goldilocks choice that balances accessibility, lifetime, and efficiency: - -- **The executor is type-erased.** Our design propagates `any_executor const&` through coroutine chains. Type erasure means the executor cannot carry concrete allocator state without either templating on allocator type (defeating type erasure) or adding another indirection layer (adding overhead). - -- **The root task is too distant.** While the root task owns the executor by value, intermediate coroutines only see `any_executor const*`. Tunneling allocator access through every `co_await` would add indirection costs, and the allocation pattern wouldn't match the I/O pattern. - -- **I/O objects have the right lifetime.** A socket outlives its operations. Frames for `socket.async_read()` are allocated when the call begins and freed when it completes—while the socket remains alive. The allocator naturally follows the object initiating the operation. - -- **Locality of allocation sizes.** Operations on the same I/O object tend to produce similar frame sizes. A socket's read operations all generate roughly equal frames. Per-object pools achieve better cache locality and recycling efficiency than global pools shared by unrelated operations. - -- **C++20 machinery supports it naturally.** The `promise_type::operator new` overloads receive coroutine parameters directly. Detecting `has_frame_allocator` on the first or second parameter is zero-overhead—no runtime dispatch, no extra storage. The I/O object is already there as the natural first argument. - -- **A global fallback handles everything else.** Coroutines without I/O object parameters—lambdas, wrappers, pure computation—fall back to a global frame pool. Universal recycling without universal plumbing. - -### 7.3 Task Integration - -The `task::promise_type` overloads `operator new` to detect frame allocator providers: - -```cpp -struct promise_type -{ - // First parameter has get_frame_allocator() - template - static void* operator new(std::size_t size, First& first, Rest&...); - - // Second parameter (for member functions where this comes first) - template - static void* operator new(std::size_t size, First&, Second& second, Rest&...) - requires (!has_frame_allocator); - - // Default: no frame allocator - static void* operator new(std::size_t size); - - static void operator delete(void* ptr, std::size_t size); -}; -``` - -When a coroutine's first or second parameter satisfies `has_frame_allocator`, the frame is allocated from that object's allocator. Otherwise, the global heap is used. - -### 7.4 Allocation Tagging - -To enable unified deallocation, we prepend a header to each frame: - -```cpp -struct alloc_header -{ - void (*dealloc)(void* ctx, void* ptr, std::size_t size); - void* ctx; -}; -``` - -The header stores a deallocation function pointer and context. When `operator delete` is called, it reads the header to determine whether to use the custom allocator or the global heap. - -### 7.5 Thread-Local Frame Pool - -I/O objects implement `get_frame_allocator()` returning a pool with thread-local caching: - -```cpp -class frame_pool -{ - void* allocate(std::size_t n) - { - // 1. Try thread-local free list - // 2. Try global pool (mutex-protected) - // 3. Fall back to heap - } - - void deallocate(void* p, std::size_t) - { - // Return to thread-local free list - } -}; -``` - -After the first iteration, frames are recycled without syscalls. The global pool handles cross-thread returns safely. - ---- - -## 8. Allocation Analysis - -With recycling enabled for both models, we achieve zero steady-state allocations: - -| Operation | Callback (recycling) | Coroutine (pooled) | -|-----------|---------------------|-------------------| -| Level 1 (read_some) | 0 | 0 | -| Level 2 (read, 5×) | 0 | 0 | -| Level 3 (request, 10×) | 0 | 0 | -| Level 4 (session, 1000×) | 0 | 0 | - -### 8.1 Recycling Matters for Both Models - -A naive implementation of either model performs poorly. Without recycling: -- **Callbacks**: Each I/O operation allocates and deallocates operation state -- **Coroutines**: Each coroutine frame is heap-allocated and freed - -**Recycling is mandatory, not optional.** Early testing confirmed that non-recycling configurations perform so poorly—dominated by allocation overhead—that they are not worth benchmarking. The key optimization for *both* models is **thread-local recycling**: caching recently freed memory for immediate reuse by the next operation. All performance comparisons in this paper assume recycling is enabled. - -### 8.2 Callback Recycling - -For callbacks, we implement a single-block thread-local cache: - -```cpp -struct op_cache -{ - static void* allocate(std::size_t n) - { - auto& c = get(); - if(c.ptr_ && c.size_ >= n) - { - void* p = c.ptr_; - c.ptr_ = nullptr; - return p; - } - return ::operator new(n); - } - - static void deallocate(void* p, std::size_t n) - { - auto& c = get(); - if(!c.ptr_ || n >= c.size_) - { - ::operator delete(c.ptr_); - c.ptr_ = p; - c.size_ = n; - } - else - ::operator delete(p); - } -}; -``` - -The pattern is: **delete before dispatch**. When an I/O operation completes, it deallocates its state *before* invoking the completion handler. If that handler immediately starts another operation, the allocation finds the just-freed memory in the cache. - -### 8.3 Coroutine Frame Pooling - -For coroutines, we use a global frame pool that all coroutines share, regardless of whether they have explicit frame allocator parameters: - -```cpp -// Default operator new uses global pool -static void* operator new(std::size_t size) -{ - static frame_pool alloc(frame_pool::make_global()); - // ... allocate from pool -} -``` - -This ensures that *all* coroutines—including lambdas, wrappers, and tasks without I/O object parameters—benefit from frame recycling. The pool uses thread-local caching with a global overflow pool for cross-thread scenarios. - ---- - -## 9. Performance Comparison - -| ⚠️ **Benchmark Scope** | -|:--| -| These benchmarks measure **dispatch overhead only**—the cost of the async machinery itself, not actual I/O operations. No network or disk I/O is performed. Real I/O operations take 10,000–100,000+ ns; dispatch overhead (4–12,194 ns) is a small fraction of total operation time. Ratios like "3.4× faster" apply to dispatch cost, not end-to-end throughput. | - -### 9.1 Clang with Frame Elision - -Benchmarks compiled with Clang 20.1, `-O3`, Windows x64, with `[[clang::coro_await_elidable]]`: - -| Operation | Callback | Coroutine | Ratio | -|-----------|----------|-----------|-------| -| Level 1 (read_some) | 4 ns | 22 ns | 5.5× (cb faster) | -| Level 2 (read, 5×) | 24 ns | 37 ns | 1.5× (cb faster) | -| Level 3 (request, 10×) | 47 ns | 58 ns | 1.2× (cb faster) | -| Level 4 (session, 1000×) | 12194 ns | 3576 ns | **0.29× (co 3.4× faster)** | - -**Key observation:** At shallow abstraction levels, callbacks outperform coroutines. However, at deep abstraction levels (Level 4), callbacks become **3.4× slower** than coroutines due to nested move constructor chains that Clang's optimizer cannot eliminate. - -### 9.2 MSVC Comparison - -The same benchmarks compiled with MSVC 19.x, RelWithDebInfo, Windows x64: - -| Operation | Callback | Coroutine | Ratio | -|-----------|----------|-----------|-------| -| Level 1 (read_some) | 5 ns | 26 ns | 5.2× (cb faster) | -| Level 2 (read, 5×) | 32 ns | 58 ns | 1.8× (cb faster) | -| Level 3 (request, 10×) | 66 ns | 92 ns | 1.4× (cb faster) | -| Level 4 (session, 1000×) | 7247 ns | 6536 ns | **0.90× (co slightly faster)** | - -**Key observation:** MSVC's optimizer successfully eliminates the callback abstraction penalty that Clang cannot. At Level 4, callbacks and coroutines perform nearly equally (1.11× ratio), demonstrating that the callback slowdown on Clang is compiler-specific rather than inherent to the model. - -### 9.3 Analysis - -**Performance is compiler-dependent.** The callback/coroutine ratio varies dramatically: - -| Depth | Clang Ratio | MSVC Ratio | GCC Ratio | -|-------|-------------|------------|-----------| -| Level 1 (1 op) | 5.5× (cb faster) | 5.2× (cb faster) | 3.4× (cb faster) | -| Level 2 (5 ops) | 1.5× (cb faster) | 1.8× (cb faster) | 1.1× (cb faster) | -| Level 3 (10 ops) | 1.2× (cb faster) | 1.4× (cb faster) | 0.76× (co faster) | -| Level 4 (1000 ops) | 0.29× (co faster) | 0.90× (co faster) | 0.57× (co faster) | - -At shallow levels, callbacks win. At deep levels, coroutines win on Clang (dramatically) and GCC (modestly); MSVC achieves near-parity. Section 10.2 explains why: nested move constructor chains that some compilers optimize away and others don't. - -### 9.4 GCC Comparison - -Benchmarks compiled with GCC 15.2, `-O3`, Windows x64: - -| Operation | Callback | Coroutine | Ratio | -|-----------|----------|-----------|-------| -| Level 1 (read_some) | 5 ns | 17 ns | 3.4× (cb faster) | -| Level 2 (read, 5×) | 32 ns | 35 ns | 1.1× (cb faster) | -| Level 3 (request, 10×) | 66 ns | 50 ns | 0.76× (co faster) | -| Level 4 (session, 1000×) | 7774 ns | 4405 ns | **0.57× (co 1.77× faster)** | - -**Key observations:** GCC shows an intermediate optimization profile between Clang and MSVC: -- **Better than Clang**: GCC achieves callback performance closer to coroutines (1.77× ratio vs Clang's 3.4×) -- **Worse than MSVC**: GCC doesn't achieve MSVC's near-parity (1.77× vs MSVC's 1.11×) -- **Callbacks excel at shallow levels**: GCC's callback implementation is faster than coroutines for levels 1-2 -- **Coroutines faster at deep levels**: At Level 4, coroutines are 1.77× faster than callbacks - -GCC optimizes shallow callback chains well but shows some penalty at deeper abstraction levels, falling between Clang's conservative approach and MSVC's aggressive optimization. - -### 9.5 Real-World Context - -For I/O-bound workloads: -- Network RTT: 100,000+ ns -- Disk access: 10,000+ ns -- Dispatch overhead: 4–12,194 ns (compiler and depth dependent) - -Even the largest dispatch overhead (~12 µs for 1000 nested callbacks on Clang) is ~12% of a typical network round-trip. For most applications, the choice between callbacks and coroutines should be driven by code clarity and maintainability rather than dispatch microbenchmarks. - ---- - -## 10. The Unavoidable Cost: `resume()` Opacity - -Coroutine performance is inherently limited by the opacity of `std::coroutine_handle<>::resume()`. The compiler cannot inline across resume boundaries because: - -1. The coroutine may be suspended at any of multiple `co_await` points -2. The frame address is only known at runtime -3. Resume effectively performs an indirect jump through the frame's resumption pointer - -Note: This overhead is unrelated to handle typing. Whether you hold `coroutine_handle` or `coroutine_handle`, the resume operation is identical—an indirect jump through the frame's resumption pointer. The opacity is intrinsic to the coroutine model, not an artifact of type erasure. - -This prevents optimizations that callbacks enable: register allocation across async boundaries, constant propagation through handlers, and dead code elimination of unused paths. - -### 10.1 HALO and Coroutine Elision - -HALO (Heap Allocation eLision Optimization) can theoretically inline coroutine frames when the compiler can prove: -1. The coroutine is immediately awaited -2. The frame doesn't escape the caller's lifetime -3. The coroutine's lifetime is bounded by the caller - -**Important limitation:** While HALO is well-documented for synchronous coroutines like generators, current compilers do not reliably apply HALO to asynchronous awaitables where `await_ready()` returns `false`. When a coroutine truly suspends (waiting for I/O, timers, etc.), the frame must persist across the suspension point, defeating standard HALO analysis. - -For our design—where tasks store handles and pass through executors—the frame "escapes" into the task object, further limiting standard HALO applicability. - -However, **Clang provides `[[clang::coro_await_elidable]]`**, an attribute that enables more aggressive frame elision for async tasks: - -```cpp -#ifdef __clang__ -#define CORO_AWAIT_ELIDABLE [[clang::coro_await_elidable]] -#else -#define CORO_AWAIT_ELIDABLE -#endif - -struct CORO_AWAIT_ELIDABLE task { /* ... */ }; -``` - -With this attribute, Clang can elide nested coroutine frames into the parent's frame when directly awaited, providing significant performance improvements for nested coroutine calls. - -This optimization is Clang-specific. MSVC and GCC do not currently support coroutine await elision, which contributes to their slower coroutine performance relative to Clang. - -### 10.2 Compiler Differences - -| Feature | Clang 20.x | MSVC 19.x | GCC 15.x | -|---------|-----------|-----------|----------| -| `[[coro_await_elidable]]` | ✓ | ✗ | ✗ | -| Frame elision (HALO) | Aggressive | Conservative | Moderate | -| Symmetric transfer | Optimized | Less optimized | Moderate | -| Coroutine speed (Level 4) | Fastest (3576 ns) | Moderate (6536 ns) | Fast (4405 ns) | -| Callback optimization | Conservative | Aggressive | Moderate | -| Callback speed (Level 4) | Slow (12194 ns) | Fast (7247 ns) | Moderate (7774 ns) | -| Callback/coroutine ratio (Level 4) | 3.4× (co faster) | 1.11× (near parity) | 1.77× (co faster) | - -**Callback performance differences:** - -- **Clang**: Conservative optimizer cannot eliminate nested move constructor chains (~280 bytes moved per I/O). Callbacks become 3.4× slower than coroutines at deep abstraction levels due to template type growth and temporary object churn. - -- **MSVC**: Aggressive optimizer inlines deeply nested move constructors through 3-4 template layers, eliminates stack temporaries, and optimizes across template instantiations. Achieves near-parity between callbacks and coroutines (1.11× ratio). - -- **GCC**: Intermediate optimization profile. Better than Clang at eliminating callback overhead (1.77× ratio vs Clang's 3.4×) but doesn't achieve MSVC's near-parity. Callbacks excel at shallow levels; coroutines faster at deep levels. - -For performance-critical coroutine code, Clang currently provides superior optimization. However, callback performance is highly compiler-dependent—MSVC demonstrates that aggressive optimization can eliminate the callback abstraction penalty entirely. - -**A note on template coroutines:** - -During development, we encountered a case where MSVC could not compile templated coroutines that use symmetric transfer: - -```cpp -// Works on MSVC -inline task async_op(socket& s) { co_await s.async_io(); } - -// Sometimes fails on MSVC with error C4737 -template -task async_op(Stream& s) { co_await s.async_io(); } -``` - -The compiler emits error C4737 ("Unable to perform required tail call. Performance may be degraded") and treats it as a hard error, even though the code is valid C++20 and compiles successfully on Clang. The error originates in the code generator rather than the front-end, so `#pragma warning(disable: 4737)` has no effect. - -This appears to be a [known issue from 2021](https://developercommunity.visualstudio.com/t/Coroutine-compilation-resulting-in-erro/1510427) that sometimes affects template instantiations involving symmetric transfer. Our non-template coroutines work correctly on both compilers, but users attempting to write generic coroutine adapters or stream wrappers should be aware of this limitation. - -This raises a broader concern: C++20 coroutines are now five years post-standardization. If a major compiler still sometimes fails to compile valid coroutine code, it suggests that implementations may struggle to keep pace with language complexity. Features that outpace compiler support create fragmentation—code that works on one toolchain fails on another, forcing developers to choose between expressiveness and portability. - -### 10.3 Implemented Mitigations - -1. **Frame pooling** (Section 7): Custom `operator new/delete` with thread-local caching eliminates allocation overhead after warmup -2. **`[[clang::coro_await_elidable]]`**: Enables frame elision for nested coroutines on Clang -3. **Symmetric transfer** (Section 5.1): Returning handles from `await_suspend` prevents stack growth -4. **Preallocated I/O state** (Section 4.1): Socket operation state is allocated once, not per-operation -5. **Global frame pool fallback**: Coroutines without explicit frame allocator parameters still benefit from pooling - ---- - -## 11. Design Trade-offs - -| Aspect | Callback Model | Coroutine Model | -|--------|---------------|-----------------| -| API Complexity | High (templates everywhere) | Low (uniform `task` type) | -| Compile Time | Poor (deep template instantiation) | Good (type erasure at boundaries) | -| Runtime (shallow, Clang) | Excellent (~4 ns) | Moderate (~22 ns) | -| Runtime (deep, Clang) | ~12194 ns for 1000 ops | ~3576 ns for 1000 ops (**co 3.4× faster**) | -| Runtime (deep, MSVC) | ~7247 ns for 1000 ops | ~6536 ns for 1000 ops (**near parity**) | -| Runtime (deep, GCC) | ~7774 ns for 1000 ops | ~4405 ns for 1000 ops (**co 1.77× faster**) | -| Allocations (optimized) | 0 (recycling allocator) | 0 (frame pooling) | -| Handle overhead | N/A | Zero (`handle` = `handle`) | -| Sender compatibility | N/A | Native (`dispatch().resume()` pattern) | -| Code Readability | Callback chains / state machines | Linear `co_await` sequences | -| Debugging | Stack traces fragmented | Stack traces fragmented | -| Encapsulation | Poor (leaky templates) | Excellent (hidden state) | -| Customization | Explicit allocator parameters | Trait-based detection | -| Compiler optimization | **Highly compiler-dependent** | Clang >> MSVC ≈ GCC | -| I/O state location | Per-operation or preallocated | Preallocated (one indirection) | -| ABI stability | Types leak through templates | Stable (platform types hidden) | -| Nested move cost | ~280 bytes moved per I/O (Clang) | ~24 bytes assigned per I/O | - ---- - -## 12. Conclusion - -We have demonstrated a coroutine-first asynchronous I/O framework that achieves: - -1. **Clean public interfaces**: No templates, no allocators, just `task` -2. **Hidden platform types**: I/O state lives in translation units -3. **Flexible executors**: Any type satisfying `executor` works; composition happens at call sites -4. **Zero steady-state allocations**: Frame pooling and recycling eliminate allocation overhead -5. **Conscious encapsulation tradeoff**: One pointer indirection buys ABI stability, fast compilation, and readable interfaces - -The affine awaitable protocol—injecting execution context through `await_transform`—provides the mechanism. Dispatch overhead is compiler-dependent (§10.2) but negligible compared to real I/O latency in all cases. - -The comparison with `std::execution` (§2.5) is instructive: that framework's complexity serves GPU workloads, not networking. Our design sidesteps those issues entirely—context flows forward, and algorithm customization is unnecessary because TCP servers don't run on GPUs. - -This divergence suggests that **networking deserves first-class design consideration**, not adaptation to frameworks optimized for GPU workloads. The future of asynchronous C++ need not be a single universal abstraction—it may be purpose-built frameworks that excel at their primary use cases while remaining interoperable at the boundaries. - ---- - -## References - -1. [N4775](https://wg21.link/n4775) — C++ Extensions for Coroutines (Gor Nishanov) -2. [P0443R14](https://wg21.link/p0443r14) — A Unified Executors Proposal for C++ (Jared Hoberock, Michael Garland, Chris Kohlhoff, et al.) -3. [P2300R10](https://wg21.link/p2300) — std::execution (Michał Dominiak, Georgy Evtushenko, Lewis Baker, Lucian Radu Teodorescu, Lee Howes, Kirk Shoop, Eric Niebler) -4. [P2762R2](https://wg21.link/p2762) — Sender/Receiver Interface for Networking (Dietmar Kühl) -5. [P3552R3](https://wg21.link/p3552) — Add a Coroutine Task Type (Dietmar Kühl, Maikel Nadolski) — *Now merged into C++26 working draft* -6. [P3826R2](https://wg21.link/p3826) — Fix or Remove Sender Algorithm Customization (Lewis Baker, Eric Niebler) -7. [Affine Awaitables](https://github.com/cppalliance/wg21-papers/blob/master/affine-awaitables.md) — Zero-Overhead Scheduler Affinity for the Rest of Us (Vinnie Falco) -8. [Affine Awaitable Protocol](https://github.com/cppalliance/wg21-papers/blob/master/affine-awaitable-protocol.md) — Formal protocol specification (Vinnie Falco) -9. [Boost.Asio](https://www.boost.org/doc/libs/release/doc/html/boost_asio.html) — Asynchronous I/O library (Chris Kohlhoff) -10. [Asymmetric Transfer](https://lewissbaker.github.io/) — Blog series on C++ coroutines (Lewis Baker) -11. [Cost Analysis](https://github.com/cppalliance/wg21-papers/blob/master/coro-first-io/COST.md) — Detailed analysis of callback vs coroutine performance and the nested move-construction problem -12. [Benchmark Source](https://github.com/cppalliance/wg21-papers/blob/master/coro-first-io/bench.cpp) — Source code for performance measurements -13. [P3185](https://wg21.link/p3185) — A proposed direction for C++ Standard Networking based on IETF TAPS (Thomas Rodgers) -14. [P3482](https://wg21.link/p3482) — Design for C++ networking based on IETF TAPS (Thomas Rodgers, Dietmar Kühl) -15. [IETF TAPS versus Boost.Asio](https://github.com/cppalliance/wg21-papers/blob/master/ietf-taps-vs-asio.md) — Analysis of framework-first vs use-case-first design approaches - ---- - -## Acknowledgements - -This paper builds on the foundational work of many contributors to C++ asynchronous programming: - -**Dietmar Kühl** for P2762 and P3552, which explore sender/receiver networking and coroutine task types respectively. His clear articulation of design tradeoffs in P2762—including the late-binding problem and cancellation overhead concerns—helped crystallize our understanding of where the sender model introduces friction for networking. - -**Lewis Baker** for his pioneering work on C++ coroutines, the Asymmetric Transfer blog series, and his contributions to P2300 and P3826. His explanations of symmetric transfer and coroutine optimization techniques directly informed our design. - -**Eric Niebler** and the P2300 authors for developing the sender/receiver abstraction. While we argue that networking benefits from a different approach, the rigor of their work provided a clear baseline for comparison. - -**Chris Kohlhoff** for Boost.Asio, which has served the C++ community for nearly two decades and established many of the patterns we build upon—and some we consciously depart from. - -The analysis in this paper is not a critique of these authors' contributions, but rather an exploration of whether networking's specific requirements are best served by adapting to general-purpose abstractions or by purpose-built designs. We hope this work contributes constructively to that ongoing discussion. diff --git a/context/corosio-design.md b/context/corosio-design.md deleted file mode 100644 index 89226bb..0000000 --- a/context/corosio-design.md +++ /dev/null @@ -1,161 +0,0 @@ -# Coroutine-First I/O Execution Model - - - -## System Properties - -Optimized for coroutines-first, other models secondary - -**HALO will improve:** Clang is best at HALO, and we should assume other compilers will eventually get there - -## Definitions - -**Definition:** An `io_object` reflects operating-system level asynchronous operation which completes through a platform-specific reactor - -**Requirement:** A coroutine signals execution affinity by participating in the affine awaitable protocol - -**Requirement:** An `io_object` resumes the calling coroutine by using its dispatcher, provided during await_suspend - -**Implication:** Because the `io_object` always resumes through the dispatcher, a coroutine at `final_suspend` is guaranteed to be executing on its own executor's context. When returning to a caller with the same executor (`caller_ex_ == ex_`), symmetric transfer can proceed without a `running_in_this_thread()` check—pointer equality is sufficient. When executors differ, `final_suspend` must dispatch through the caller's executor. - - - -## Executor Operations - -An executor is to coroutines what an allocator is to memory. An executor encapsulates rules for where, when, and how a coroutine resumes. Lightweight, copyable handle to an execution context (thread pool, strand, system threads, etc.). There are three basic operations: - -- `dispatch`: Run inline if allowed, else queue. Cheapest path. Use when crossing execution context boundaries (e.g., I/O thread completing an operation, resuming a coroutine on the user's executor). Runs inline if executor rules permit, otherwise queues. - -- `post`: Always queue, never inline. Use when you need guaranteed asynchrony — resumption must not happen inline. Rarely needed for coroutines. - -- `defer`: Always queue, but hints "this is my continuation" — enables tail-call/thread-local optimizations. Use when resuming a coroutine from within the same executor context and you must go through the executor (e.g., strand enforcement, different associated executors in chain). Optimizes as continuation via thread-local queue. - -- **symmetric transfer**: Prefer over all three when caller and callee share the same executor and no strand/ordering constraints apply. Direct tail call, zero overhead, no executor involvement. - -In a pure coroutine model, symmetric transfer handles continuation chaining directly — the compiler generates tail calls between frames, no executor involvement. `defer` is an executor-based optimization that becomes redundant when you can just return the next coroutine handle. - -### Decision order: - -1. Same executor, no constraints → symmetric transfer -2. Crossing context boundary → dispatch -3. Same executor, must use executor (strand, etc.) → defer -4. Need guaranteed async → post - - - -## Flow Diagrams - -A _flow diagram_ signifies a composed asynchronous call chain reified as a series of co_awaits. - -* Coroutines are lazy and indicated by `c`, `c1`, `c2`, ... -* `io_object` are represented as `io`, `io1`, `io2`, ... -* Foreign awaitable contexts are represented as `f`, `f1`, `f2`, ... -* `co_await` leading to an `io_object` are represented by arrows `->` -* `co_await` to a foreign context are represented by `\` -* Executors are annotated `ex`, `ex1`, `ex2`, ... -* A coroutine with _executor affinity_ is preceded by `!` - - - -This call chain: -``` -c -> io -``` -represents (ordinals are left out when singular): -``` -task c(io_object& io) { co_await io.op(); } -``` - - -This call chain: -``` -c1 -> c2 -> io -``` -represents: -``` -task c1(io_object& io) { co_await c2(io); } -task c2(io_object& io) { co_await io.op(); } -``` - - - -A coroutine can await a foreign context, to send work elsewhere: -``` -c1 -> c2 -> io - \ - f -``` -for example a CPU-bound task to not block the io thread: -``` -task c1(io_object& io) { co_await c2(io); } -task c2(io_object& io) { co_await f(); co_await io.op(); } -``` - - - -A coroutine preceded by an exclamation point in a flow diagram has _executor affinity_, or just _affinity_. It is a lazy coroutine, and its call to resume is dispatched through `ex`. The execution model offers an invariant: the later call to resume `c` happens through the executor `ex`, obtained through `co_await io.op()` (affine awaitable protocol). -``` -!c1 -> io -``` -achieved by (`run_on` is notional) -``` -task c1(io_object&); -//... -run_on( ex, c1(io) ); -``` -Affinity propagates forward through the affine awaitable protocol. The initial coroutine in the chain captures the typed executor by value, and propagates it by reference forward. A coroutine created in this fashion captures the type-erased affine dispatcher and propagates it forward. This continues to the `io_object` where it is stored and used later when the operation completes. - -Subsequent coroutines in a flow diagram representing I/O execution -have inherited affinity unless annotated with an exclamation point indicating that -their affinity has changed: -``` -!c1 -> c2 -> !c3 -> io -``` -represents -``` -task c1(io_object& io) { co_await c2(io); } -task c2(io_object& io) { co_await run_on( ex2, c3(io) ); } -task c3(io_object& io) { co_await io.op(); } -``` -The `run_on` function is notional, representing an awaitable means of awaiting a new coroutine on the same `io_object`, on a different executor `ex2`. What must happen here is the following: -- `c1` is launched on `ex1` (implied) -- `c2` continues in `ex1` implicitly -- `c3` is launched on `ex2` explicitly -- `io` captures the type-erased `ex2` -- When the I/O completes, `c3` is resumed through `ex2` -- When `c3` returns, `c2` is resumed through `ex1` -- When `c2` returns, its dispatcher compares equal to `c1`'s dispatcher and the transfer is symmetric; `ex1` is not invoked - - - - - -## Allocation Model - -C++20 coroutines support frame allocation customization through operator new and operator delete in the promise type. The call: -``` -co_await f(); -``` -Calls the simple `operator new` and `operator delete`: -``` -struct promise_type { - void* operator new(std::size_t size) { return ::operator new(size); } - void operator delete(void* ptr, std::size_t size) { ::operator delete(ptr, size); } -}; -``` - -The coroutine's arguments can be used to overload these operators. The call: -``` -co_await f(std::allocator_arg, alloc, x); -``` -Invokes allocator-aware `operator new`, where `std::allocator_arg_t` is the conventional detection tag: -``` -struct promise_type { - template - void* operator new(std::size_t size, std::allocator_arg_t, Allocator alloc, Args&&...); - - template - void operator delete(void* ptr, std::size_t size, std::allocator_arg_t, Allocator, Args&&...); -}; -``` -Allocator-aware `operator new` stores the allocator in the frame (appended after `size` bytes); matching `operator delete` recovers it for deallocation. If allocation fails and `get_return_object_on_allocation_failure()` exists in the promise, it is called instead of throwing. Compilers may elide frame allocation entirely (HALO) when coroutine lifetime is provably bounded by the caller. diff --git a/context/execution-io-first.md b/context/execution-io-first.md deleted file mode 100644 index 963fd10..0000000 --- a/context/execution-io-first.md +++ /dev/null @@ -1,838 +0,0 @@ -| Document | D0000 | -|----------|-------| -| Date: | 2025-01-05 -| Reply-to: | Vinnie Falco \ -| Audience: | SG1, LEWG - ---- - -# A Coroutines-First I/O Execution Model - -## Abstract - -This paper asks: *what would an execution model look like if designed from the ground up for coroutine-driven asynchronous I/O?* - -An execution model answers how asynchronous work is scheduled and dispatched, where operation state is allocated and by whom, and what customization points allow users to adapt the framework's behavior. We propose a **coroutines-first** execution model optimized for CPU-bound I/O workloads. The framework comprises: a minimal executor abstraction built on `dispatch` and `post`; the **affine awaitable protocol** for zero-overhead scheduler affinity; dispatcher propagation through coroutine chains; and allocator customization via I/O object arguments. - -Central to our design is a clear responsibility model: **the executor decides allocation policy**, while **the I/O object owns its executor**. The I/O object becomes a unified carrier—owning the dispatcher binding and carrying the allocator reference through composed operation chains. Both resources propagate forward through I/O object arguments, addressing timing constraints that backward query models cannot satisfy. The result is a simpler abstraction: two operations, clear ownership semantics, forward context propagation, and direct mapping to what I/O completion paths actually do. - -An explicit tradeoff: we consciously trade one pointer indirection per I/O operation (~1-2 nanoseconds) for complete type hiding. Executor types do not leak into public interfaces; platform I/O types remain hidden in translation units; composed algorithms expose only `task` return types. This type hiding directly enables ABI stability—library implementations can evolve without breaking user binaries. It also eliminates the template complexity and compile time bloat that have long plagued C++ async libraries. In contrast, `std::execution`'s `connect(sender, receiver)` returns a fully-typed `operation_state`, forcing implementation types through every API boundary. - ---- - -## 1. Introduction - -The C++ standardization effort has produced `std::execution` (P2300), a sender/receiver framework for asynchronous programming. Its design accommodates heterogeneous computing—GPU kernels, parallel algorithms, and hardware accelerators. The machinery is substantial: domains select algorithm implementations, queries determine completion schedulers, and transforms dispatch work to appropriate backends. - -But what does asynchronous I/O actually need? - -A network server completes read operations on I/O threads, resumes handlers on application threads, and serializes access to connection state. A file system service batches completions through io_uring, dispatches callbacks to worker pools, and manages buffer lifetimes across suspension points. These patterns repeat across every I/O-intensive application. None of them require domain-based algorithm dispatch. TCP servers do not run on GPUs. Socket reads have one implementation per platform, not a menu of hardware backends. - -**This paper explores what emerges when I/O requirements drive the design.** - -We find that two operations suffice: **dispatch** for continuations and **post** for independent work. The choice between them is correctness, not performance. Using dispatch while holding a mutex invites deadlock. Using post for every continuation wastes cycles bouncing through queues. But primitives alone don't solve the composition problem. In a chain like `async_http_request → async_read → socket`, who decides which allocator to use? Who determines which executor runs the completion? The answers are not symmetric: - -- **The executor decides allocation policy.** Memory strategy—recycling pools, bounded allocation, per-tenant budgets—is a property of the execution context, not the I/O operation. A socket doesn't care about memory policy; the context in which it runs does. - -- **The I/O object owns its executor.** A socket registered with epoll on thread A cannot have completions delivered to thread B's epoll. The physical coupling is inherent. The call site cannot know which event loop the socket uses—only the socket knows. - -This responsibility model makes the I/O object a **unified carrier**: it owns the executor binding and carries the allocator reference (provided by the executor at construction). Every coroutine in the chain receives both resources through the I/O object argument. The timing constraints matter. Frame allocation occurs before the coroutine body executes—the allocator must be discoverable from arguments, not injected via `await_transform`. The dispatcher must be active from the first instruction—code before the first `co_await` must run in the correct context, not just code after suspension. - -We show that execution context flows naturally *forward* through async operation chains, eliminating the backward query model that `std::execution` struggles to fix. The result is a simpler abstraction with clear ownership semantics. Our goal is not to replace `std::execution` for GPU workloads. Our goal is to demonstrate that networking deserves first-class design consideration—a purpose-built abstraction rather than adaptation to a framework optimized for different requirements. - ---- - -## 2. Motivation - -### 2.1 Why Not the Networking TS? - -The Networking TS (and its progenitor Boost.Asio) is the de facto standard for asynchronous I/O in C++. It is mature, well-tested, and supports coroutines through completion tokens. Why build something new? - -#### 2.1.1 The Template Tax - -The Networking TS design philosophy—zero-overhead abstraction through templates—incurs costs that compound in large codebases: - -**Every async operation signature includes executor and handler types:** -```cpp -template -void async_read(basic_socket& s, - MutableBufferSequence const& buffers, - ReadHandler&& handler); -``` - -**Composed operations propagate these types:** -```cpp -template -void async_http_request(basic_socket& sock, - http::request const& req, - Handler&& handler); -``` - -This creates: -- **N×M template instantiations** for N operations × M executor/handler combinations -- **Header-only dependencies** that must be recompiled on change -- **Binary size growth** that can reach megabytes in complex applications -- **Compile times measured in minutes** for moderate codebases -- **Nested move-construction overhead** at runtime—composed handlers like `read_op>>` must be moved at each abstraction layer, with costs that compound as nesting depth increases - -#### 2.1.2 The Encapsulation Problem - -The Networking TS templates expose implementation details through public interfaces: - -```cpp -// User sees: -class http_client -{ -public: - template - void async_get(Executor ex, std::string url, Handler&& h); -}; -``` - -The user is forced to know: -- What executor types are valid -- What handler signatures are expected -- That the implementation uses sockets at all - -Platform types (`OVERLAPPED`, `io_uring_sqe`) do not appear in public API signatures—users write portable code. But they are not hidden from translation units. Asio is header-only; its internal operation types publicly inherit from `OVERLAPPED`; including `` pulls Windows headers into every compilation unit. The *API* is portable; the *compilation* is not. And the *structure* of the async machinery still leaks through every API boundary. - -**Our approach:** -```cpp -class http_client -{ -public: - task async_get(std::string url); // That's it. -}; -``` - -The implementation—sockets, buffers, executors—lives in the translation unit. The interface is stable. The ABI is stable. Compilation is fast. - -#### 2.1.3 Coroutine-Compatible vs Coroutine-First - -The Networking TS added coroutine support through completion tokens like `use_awaitable`: - -```cpp -co_await async_read(socket, buffer, use_awaitable); -``` - -This adapts callback-based operations for coroutines. It works, but: -- **Double indirection**: Callback machinery wraps coroutine machinery -- **Executor handling is manual**: `co_spawn` and `bind_executor` required -- **Error handling diverges**: Exceptions vs `error_code` vs `expected` -- **Mental model mismatch**: Writing coroutines that think in callbacks - -Our design is **coroutine-first**: the suspension/resumption model is the foundation, not an adapter. Executor propagation is automatic. Type erasure is structural. The callback path (`dispatch().resume()`) is the compatibility layer, not the other way around. - -#### 2.1.4 When to Use the Networking TS Instead - -The Networking TS remains the right choice when: -- You need callback-based APIs for C compatibility -- Template instantiation cost is acceptable -- You're already invested in the Asio ecosystem -- Maximum performance with zero abstraction is required -- Standardization timeline matters for your project - -Our framework is better suited when: -- Coroutines are the primary programming model -- Public APIs must hide implementation details -- Compile time and binary size matter -- ABI stability is required across library boundaries -- Clean, simple interfaces are prioritized - -### 2.2 Why Not std::execution? - -The C++26 `std::execution` framework (P2300) provides sender/receiver abstractions for asynchronous programming. We analyzed both the base proposal and its evolution to understand how well it addresses networking. - -#### 2.2.1 P2300: Networking Mentioned, Not Addressed - -P2300 references networking and GPU/parallel topics in roughly equal measure. Superficially balanced. However, the *nature* of these references differs: - -| Aspect | GPU/Parallel | Networking | -|--------|--------------|------------| -| Concrete primitives | `bulk` algorithm, parallel scheduler | None — deferred to P2762 | -| Platform integration | CUDA mentioned by name | No IOCP, epoll, or io_uring | -| Serialization | N/A | **Zero strand mentions** | -| Status | Normative specification | Illustrative examples only | - -P2300 explicitly defers networking: - -> "Dietmar Kuehl has proposed networking APIs that use the sender/receiver abstraction (see P2762)." — P2300, Section 1.4.1.3 - -The framework provides composition primitives but **no I/O operations, no buffer management, and no strand serialization**. Networking exists as examples, not as design drivers. - -**What P2762 actually proposes:** - -P2762, the proposal P2300 defers to, explicitly chooses the backward query model as "most useful": - -> "Injecting the used scheduler from the point where the asynchronous work is actually used." - -Yet the paper immediately acknowledges the late-binding problem: - -> "When the scheduler is injected through the receiver the operation and used scheduler are brought together rather late, i.e., when `connect(sender, receiver)`ing and when the work graph is already built." - -P2762 also raises per-operation cancellation overhead as a concern—the same issue our socket-level approach solves—but offers no solution: - -> "Repeatedly doing that operation while processing data on a socket may be a performance concern... This area still needs some experimentation and, I think, design." - -P2762 leaves core networking features unspecified: - -| Feature | P2762 Status | -|---------|--------------| -| Scheduler interface | "It isn't yet clear how such an interface would actually look" | -| Strand serialization | Not mentioned | -| Buffer pools | "There is currently no design" | - -This confirms our thesis: networking is being adapted to `std::execution` rather than driving its design. The proposal acknowledges problems with the query model, identifies performance concerns we solve, yet leaves core networking abstractions unaddressed. - -#### 2.2.2 P3826: GPU-First Evolution - -P3826R2, "Fix or Remove Sender Algorithm Customization," reveals where `std::execution`'s complexity originates. The paper identifies a fundamental flaw: - -> "Many senders do not know where they will complete until they know where they will be started." - -The proposed fix adds `get_completion_domain` queries, `indeterminate_domain<>` types, and double-transform dispatching. This machinery exists to select GPU vs CPU algorithm implementations. - -**P3826 contains over 100 references to GPU, parallel execution, and hardware accelerators. It contains zero references to networking, sockets, or I/O patterns.** - -| Concern | GPU/Parallel (P3826's Focus) | Networking (Unaddressed) | -|---------|------------------------------|--------------------------| -| Algorithm dispatch | Critical — GPU vs CPU kernel | Irrelevant — one implementation per platform | -| Completion location | May differ from start | Same context or OS completion port | -| Domain customization | Required for acceleration | Unused overhead | - -Networking requires different things entirely: -- **Strand serialization**: Ordering guarantees, not algorithm selection -- **I/O completion contexts**: IOCP, epoll, io_uring integration -- **Executor propagation**: Ensuring handlers run on the correct thread -- **Buffer lifetime management**: Zero-copy patterns, scatter/gather - -None appear in P3826's analysis. - -#### 2.2.3 The Query Model Problem - -Both P2300 and P3826 share a design choice: **backward queries** for execution context. - -```cpp -// std::execution pattern: query sender/receiver for properties -auto sched = get_scheduler(get_env(rcvr)); -auto domain = get_domain(get_env(sndr)); - -// P3826 addition: tell sender where it starts -auto domain = get_completion_domain(get_env(sndr), get_env(rcvr)); -``` - -This requires senders to be self-describing—problematic when context is only known at the call site. P3826 attempts to fix this by passing hints, but the fix adds complexity rather than questioning the query model itself. - -#### 2.2.4 Forward Propagation Alternative - -Our coroutine model propagates context forward: - -```cpp -// Coroutine-first: context flows from caller to callee -run_async(my_executor, my_task()); // Executor injected at root -// await_transform propagates any_executor const& to all children — no queries needed -``` - -The executor is always known because it flows with control flow, not against it. No domain queries. No completion scheduler inference. No transform dispatching. - -#### 2.2.5 Observations - -Comparing our networking-first design with `std::execution` reveals significant divergence: - -1. **What we need, they don't have**: Strand serialization, platform I/O integration, buffer management -2. **What they have, we don't need**: Domain-based algorithm dispatch, completion domain queries, sender transforms -3. **Context flow**: They query backward; we inject forward - -The `std::execution` framework may eventually support networking, but its evolution is driven by GPU and parallel workloads. The complexity P3826 adds—to fix problems networking doesn't have—suggests that networking was not a primary design consideration. - -A framework designed from networking requirements produces a simpler, more direct solution. - -#### 2.2.6 The std::execution Tax - -If networking is required to integrate with `std::execution`, I/O libraries must pay a complexity tax: - -- **Query protocol compliance**: Implement `get_env`, `get_domain`, `get_completion_scheduler`—even if only to return defaults -- **Concept satisfaction**: Meet sender/receiver requirements designed for GPU algorithm dispatch -- **Transform machinery**: Domain transforms execute even when they select the only available implementation -- **API surface expansion**: Expose attributes and queries irrelevant to I/O operations -- **Type leakage**: `connect(sender, receiver)` returns a fully-typed `operation_state`, forcing implementation details through every API boundary - -The type leakage deserves emphasis. In the sender/receiver model: - -```cpp -// std::execution pattern -execution::sender auto snd = socket.async_read(buf); -execution::receiver auto rcv = /* ... */; -auto state = execution::connect(snd, rcv); // Type: connect_result_t -``` - -The `connect_result_t` type encodes the full operation state. Algorithms that compose senders must propagate these types: - -```cpp -// From P2300: operation state types leak into composed operations -template -struct _retry_op { - using _child_op_t = stdexec::connect_result_t>; - optional<_child_op_t> o_; // Nested operation state, fully typed -}; -``` - -This tax exists regardless of whether networking benefits from it. A socket that returns `default_domain` still participates in the dispatch protocol. The P3826 machinery runs, finds no customization, and falls through to the default—overhead for nothing. - -**The question is not whether P2300/P3826 break networking code.** They don't—defaults work. **The question is whether networking should pay for abstractions it doesn't use.** Our analysis suggests the cost is not justified when a simpler, networking-native design achieves the same goals. - -#### 2.2.7 Divergent Standardization Efforts - -WG21 faces a troubling pattern: std::execution continues evolving without addressing networking needs (§2.2.1-2.2.6), while separate proposals ([P3185](https://wg21.link/p3185), [P3482](https://wg21.link/p3482)) advocate replacing proven socket-based designs with the IETF TAPS framework—an abstract architecture with essentially no production deployment after a decade of development. - -Our analysis in [IETF TAPS versus Boost.Asio](https://github.com/cppalliance/wg21-papers/blob/master/ietf-taps-vs-asio.md) documents the risks of framework-first design. History favors proven practice: TCP/IP over OSI, BSD sockets over every proposed replacement, Boost.Asio's twenty years of field experience over speculative architecture. - -This paper demonstrates that a networking-first coroutine framework is both achievable and practical—without waiting for std::execution to address needs it wasn't designed for, and without adopting an untested abstraction model. - -### 2.3 Why Not Wait For P3482 (IETF TAPS)? - -P3185 and P3482 propose aligning C++ networking with the IETF Transport Services (TAPS) initiative—an abstract architecture where applications specify properties (reliability, ordering, latency) and the transport system selects protocols at runtime. The vision is appealing: protocol agility, connection racing, secure-by-default semantics. The reality is sobering. After a decade of IETF standardization work, only one production implementation exists: Apple's proprietary Network.framework, which predates TAPS and is not fully compliant. The open-source NEAT implementation, funded by the EU's Horizon 2020 program, was abandoned immediately when funding ended in 2018. No sustained community emerged; no living tradition of knowledge survives. - -This pattern mirrors OSI's failure against TCP/IP. Framework-first designs—comprehensive, theoretically elegant, developed through specification processes—consistently lose to pragmatic, use-case-driven designs refined through deployment. BSD sockets, despite their warts, achieved universal adoption because they solved real problems programmers actually had. Asio built on this foundation with twenty years of continuous evolution: C++11 move semantics, executor model refinement, C++20 coroutine support. Each adaptation responded to production feedback, not committee speculation. The burden of proof lies with the replacement: TAPS has accumulated draft revisions while Asio has accumulated deployments. History's verdict on framework-first versus practice-first is unambiguous. - ---- - -## 3. Different Workloads, Different Needs - -### 3.1 What GPU Workloads Need - -GPU and parallel workloads require **algorithm dispatch**—selecting the right implementation for the hardware. A matrix multiply may run on CPU, GPU, or specialized accelerator. The framework must: - -- **Query completion domains**: Where will this work finish? (P3826's central concern) -- **Select algorithm implementations**: `bulk` on GPU vs CPU has different code paths -- **Manage hardware heterogeneity**: CUDA, SYCL, OpenMP backends - -P3826 adds `get_completion_domain` queries and double-transform dispatching precisely for this: the sender doesn't know where it runs until connected to a receiver that reveals the target hardware. - -### 3.2 What I/O Workloads Need - -I/O workloads have fundamentally different requirements. A socket read has **one implementation per platform**—there's no GPU vs CPU choice. Instead, I/O needs: - -- **Completion contexts**: Integration with IOCP, epoll, io_uring -- **Strand serialization**: Ordering guarantees for concurrent access -- **Buffer lifetime management**: Zero-copy patterns, scatter/gather -- **Thread affinity**: Handlers must resume on the correct thread - -None of these appear in P3826's analysis. The framework's complexity serves algorithm selection—a problem I/O doesn't have. - -### 3.3 The Executor is Fundamental - -Both workloads share one need: **something that runs work**. Strip away algorithm dispatch, domain queries, and hardware selection. What remains? - -Two operations: -- **`dispatch`**: Run this continuation (inline if safe) -- **`post`**: Queue this independent work (never inline) - -The choice between them is **correctness, not optimization**. Using `dispatch` while holding a mutex invites deadlock. Using `post` for every continuation wastes cycles bouncing through queues. - -The **executor**—a type providing `dispatch` and `post`—is the minimal foundation. Everything else (senders, schedulers, algorithm dispatch) can be built on top. For I/O workloads, nothing else is needed. - -### 3.4 Allocation Control is Essential - -Neither callbacks nor coroutines achieve competitive performance without allocation control. Early testing showed that fully type-erased callbacks—using `std::function` at every composition boundary—allocated hundreds of times per invocation and ran an order of magnitude slower than native templates. Non-recycling coroutine configurations perform so poorly, dominated by heap allocation overhead, that they are not worth benchmarking. - -**Recycling is mandatory for performance.** Thread-local recycling—caching recently freed memory for immediate reuse—enables zero steady-state allocations. Callbacks achieve this through "delete before dispatch": deallocate operation state before invoking the completion handler. Coroutines achieve it through frame pooling: custom `operator new/delete` recycles frames across operations. - -But thread-local recycling optimizes for throughput, not all applications. Different deployments have different needs: - -- **Memory conservation**: Embedded systems or resource-constrained environments may prefer bounded pools over unbounded caches -- **Per-tenant limits**: Multi-tenant servers need allocation budgets per client to prevent resource exhaustion -- **Debugging and profiling**: Development builds may want allocation tracking that production builds eliminate - -Coroutines are continuously created—every `co_await` may spawn new frames. An execution model must make allocation a first-class customization point that controls *all* coroutine allocations, not just selected operations. Without this, memory policy becomes impossible to enforce. - ---- - -## 4. Who Owns What? - -### 4.1 The Problem: Propagation Through Chains - -Consider a composed coroutine chain: - -``` -http_client → http_request → write → write_some → socket -``` - -Each coroutine in this chain needs two resources: - -1. **An allocator** for its frame. Where does memory come from? -2. **A dispatcher** for its completion. Where does control flow resume? - -The questions seem symmetric but have different answers: - -- Should every frame in the chain use the **same allocator**? Usually yes—the allocation strategy is a deployment decision, not an operation-specific choice. -- Should every step use the **same dispatcher**? Usually yes—the completion context is determined by the outermost caller, not by intermediate operations. - -The challenge is *how* these resources reach each coroutine. In `std::execution`, context flows backward through queries: - -```cpp -// std::execution pattern: query receiver for scheduler -auto sched = get_scheduler(get_env(receiver)); -``` - -P2762 acknowledges the timing problem this creates: - -> "When the scheduler is injected through the receiver the operation and used scheduler are brought together rather late, i.e., when `connect(sender, receiver)`ing and when the work graph is already built." - -By the time the sender knows its scheduler, the operation structure is fixed. Allocation decisions that should have been made at construction time are deferred until connection time—too late for coroutine frame allocation, which occurs before the coroutine body executes. - -We propose a different model: **forward propagation**. The executor and allocator are determined at I/O object construction and flow forward through the call chain. Every coroutine receives both resources through its I/O object argument, available from the first instruction. - -### 4.2 The Executor Decides Allocation Policy - -Memory strategy is a property of the **execution context**, not the I/O operation. A socket doesn't care whether its coroutine frames are pooled, bounded, or tracked—it performs the same read regardless. But the *context* in which that socket runs cares deeply: - -**High-throughput server.** Thread-local recycling pools achieve zero steady-state allocations. The executor maintains per-thread free lists; coroutine frames are recycled immediately upon completion. This is the default for production I/O contexts. - -**Real-time audio.** Bounded pools prevent allocation latency spikes. The executor pre-allocates a fixed pool at startup; operations that exceed the pool fail fast rather than blocking on the system allocator. Predictable latency matters more than handling arbitrary load. - -**Multi-tenant isolation.** Per-tenant budgets prevent resource exhaustion. The executor tracks allocations per client; operations that exceed a tenant's quota are rejected before they can starve other tenants. Memory policy becomes a security boundary. - -**Debugging and profiling.** Development builds may want allocation tracking—counting frames, detecting leaks, measuring pool efficiency. Production builds eliminate this overhead entirely. - -The executor is the natural owner of allocation policy because: - -1. **The executor knows the deployment context.** A server process knows its concurrency model, memory constraints, and tenant boundaries. Individual I/O operations do not. - -2. **The executor outlives individual operations.** Recycling requires memory that persists across operations. The executor's lifetime spans the entire I/O workload. - -3. **The executor can provide the allocator at construction time.** When an I/O object is created, the executor supplies an allocator reference. This reference propagates forward through every coroutine that uses the I/O object. - -The result: memory policy is configured once at executor construction, and every operation inherits it automatically. - -### 4.3 The I/O Object Owns Its Executor - -Unlike allocation policy, executor ownership cannot be separated from the I/O object. The coupling is physical: - -**Platform I/O is context-specific.** A socket registered with epoll on thread A cannot have completions delivered to thread B's epoll instance. An IOCP handle belongs to a specific completion port. An io_uring submission queue is tied to a specific ring. The I/O object *must* know its completion context—it cannot be injected later. - -P2762 acknowledges this reality in §4.1: - -> "It was pointed out networking objects like sockets are specific to a context: that is necessary when using I/O Completion Ports or a TLS library and yields advantages when using, e.g., io_uring(2)." - -**Safety requires affinity.** Delivering a completion to the wrong context is not merely inefficient—it is undefined behavior. The kernel expects the completion to be consumed by the registered event loop. Violating this expectation corrupts internal state. - -**The call site cannot know.** When `async_http_request` calls `async_read`, it passes a socket reference. The HTTP layer doesn't know—and shouldn't need to know—which event loop the socket uses. That knowledge is encapsulated in the socket itself. - -**Composition must be transparent.** If intermediate operations had to thread executor information through their signatures, every composed operation would need an executor parameter: - -```cpp -// What we want to avoid: -template -task async_http_request(Executor ex, socket& s, request const& req); -``` - -Instead, the socket carries its executor internally. Composed operations pass the socket; the executor comes along for free: - -```cpp -// What we achieve: -task async_http_request(socket& s, request const& req); -``` - -The executor binding happens once, at socket construction. From that point forward, every operation on the socket inherits the correct completion context without explicit parameter passing. - -### 4.4 The I/O Object as Unified Carrier - -The preceding sections establish two ownership rules: - -1. The **executor** decides allocation policy (§4.2) -2. The **I/O object** owns its executor (§4.3) - -Combining these rules, the I/O object becomes a **unified carrier**: it owns the executor binding and carries the allocator reference that the executor provides at construction time. - -``` -┌──────────────────────────────────────────────────────────────────┐ -│ Executor │ -│ • Decides allocation policy (pools, limits, tracking) │ -│ • Provides allocator reference at I/O object construction │ -│ • Defines completion context (event loop, strand) │ -└──────────────────────────────────────────────────────────────────┘ - │ - │ constructs with allocator + context - ▼ -┌──────────────────────────────────────────────────────────────────┐ -│ I/O Object │ -│ • Owns executor/dispatcher binding │ -│ • Carries allocator reference from executor │ -│ • Passed as argument to every async operation │ -└──────────────────────────────────────────────────────────────────┘ - │ - │ argument to async operations - ▼ -┌──────────────────────────────────────────────────────────────────┐ -│ Coroutines │ -│ • Detect allocator via io_object concept │ -│ • Receive dispatcher via await_transform injection │ -│ • Both resources available from the first instruction │ -└──────────────────────────────────────────────────────────────────┘ -``` - -Every coroutine in the chain receives both resources through the I/O object argument: - -- **Allocator**: The coroutine's `promise_type::operator new` detects `io_object` on the I/O object parameter and uses the executor's allocator for frame allocation. - -- **Dispatcher**: The promise's `await_transform` extracts the dispatcher from the I/O object and injects it into every `co_await`, ensuring completions resume in the correct context. - -This forward propagation model addresses timing constraints that backward query models cannot satisfy: - -- **Allocation occurs before the coroutine body executes.** The allocator must be discoverable from arguments at `operator new` time—it cannot wait for `await_transform` injection or receiver connection. - -- **The dispatcher must be active from the first instruction.** Code before the first `co_await` must run in the correct context. Strands, for example, require serialization from the very beginning, not just after the first suspension. - -The I/O object is the natural carrier because it is always present: every I/O operation takes an I/O object as an argument. No additional parameters are needed. No queries are required. Context flows forward with the data. - ---- - -## 5. Timing Constraints - -### 5.1 Allocator: Before the Body Executes - -Coroutine frame allocation has a fundamental timing constraint: **`operator new` executes before the coroutine body**. When a coroutine function is called, the compiler-generated code allocates the frame first, then begins execution at the initial suspend point. This sequence is fixed by the language. - -```cpp -auto t = my_coro(sock); // operator new called HERE -co_await t; // await_transform kicks in HERE (too late for allocation) -``` - -The allocator must be discoverable at the moment of invocation—from information available in the function call itself. Any mechanism that injects context later (receiver connection, `await_transform`, explicit method calls) arrives too late to influence frame allocation. - -C++ provides exactly one hook at the right time: **`promise_type::operator new`**. The compiler passes coroutine arguments directly to this overload, allowing the promise to inspect parameters and select an allocator: - -```cpp -template -static void* operator new(std::size_t size, Arg0& arg0, Args&...) { - return arg0.get_executor().allocate_frame(size); -} -``` - -The `io_object` concept detects types that provide an executor via `get_executor()`. When the first or second argument satisfies this concept, the promise uses the executor's allocator. Otherwise, a global pool provides the default. - -**Contrast with `std::execution`.** P2762 acknowledges that the receiver-based model brings scheduler and operation together "rather late"—at `connect()` time, after the work graph is already built. For coroutines, this timing is fundamentally incompatible with frame allocation. The allocator cannot wait for receiver queries; it must be present in the arguments. - -### 5.2 Dispatcher: From the First Instruction - -The dispatcher constraint is subtler than allocation timing. Within a composed coroutine chain—where each coroutine takes the I/O object by reference and is awaited by a parent with the same affinity—synchronous code runs in the correct context because the caller is already there. - -The problem arises at **task initiation boundaries**: when crossing from non-coroutine code (callbacks, event handlers, timers) into coroutine code via `spawn()` or `start()`. - -```cpp -// Accept handler on the I/O thread—NOT on any strand -void on_accept(socket sock) { - spawn(connection_handler(std::move(sock))); // Task starts HERE -} - -task connection_handler(socket sock) { - setup_connection(); // Running in accept handler's context! - validate_headers(); // Could race with I/O completions - co_await sock.read(buf); // Dispatcher kicks in HERE... too late -} -``` - -The `spawn` call starts the task in the accept handler's execution context. All synchronous code before the first `co_await` runs there—potentially racing with completions that arrive on the socket's strand. The `await_transform` mechanism only takes effect at suspension points; it cannot protect code that executes before any `co_await`. - -**Implicit vs explicit strands.** Not every socket needs strand protection from the first instruction: - -- **Implicit strand**: A freshly-created socket has singular ownership. No completions are pending; no other code has access. The "strand" is implicit in the fact that only one entity can touch the socket. - -- **Explicit strand**: Once the socket is shared—stored in a container, passed to callbacks, accessible from completions—concurrent access becomes possible. A physical strand is needed from the first instruction. - -The handoff to the coroutine is the boundary. Once `connection_handler` is spawned, both the coroutine body and I/O completions could execute concurrently. - -**Resolution options.** Several approaches address this constraint: - -- **Eager initial dispatch**: The task suspends at `initial_suspend` and resumes on the I/O object's dispatcher before any user code runs. This guarantees correct context but adds latency for tasks that don't need strand protection. - -- **Caller responsibility**: Document that `spawn` must be called from the correct context. Simple but error-prone—the invariant is not enforced. - -- **Lazy with immediate dispatch**: The task is lazy (returns without executing), but the first action upon `start()` is to dispatch to the correct context before resuming the coroutine body. - -| Resource | Must Be Available | Challenge | -|------------|------------------------------------|------------------------------------| -| Allocator | At frame allocation (before body) | Executor not yet injected | -| Dispatcher | At first instruction | Caller may be in wrong context | - -The allocator constraint is absolute—there is no later opportunity. The dispatcher constraint depends on whether the socket requires explicit strand protection and how the task is initiated. - ---- - -## 6. The Executor - -**Terminology note.** We use the term *executor* rather than *scheduler* intentionally. In `std::execution`, schedulers are designed for heterogeneous computing—selecting GPU vs CPU algorithms, managing completion domains, and dispatching to hardware accelerators. Networking has different needs: strand serialization, I/O completion contexts, and thread affinity. By using *executor*, we signal a distinct concept tailored to networking's requirements. This terminology also honors Chris Kohlhoff's executor model in Boost.Asio, which established the foundation for modern C++ asynchronous I/O. - -C++20 coroutines provide type erasure *by construction*—but not through the handle type. `std::coroutine_handle` and `std::coroutine_handle` are both just pointers with identical overhead. The erasure that matters is *structural*: - -1. **The frame is opaque**: Callers see only a handle, not the promise's layout -2. **The return type is uniform**: All coroutines returning `task` have the same type, regardless of body -3. **Suspension points are hidden**: The caller doesn't know where the coroutine may suspend - -This structural erasure is often lamented as overhead, but we recognize it as opportunity: *the allocation we cannot avoid can pay for the type erasure we need*. - -A coroutine's promise can store execution context *by reference*, receiving it from the caller at suspension time rather than at construction time. This deferred binding enables a uniform `task` type that works with any executor, without encoding the executor type in its signature. - -The executor is the minimal foundation for I/O workloads—a type that provides two operations: **dispatch** for continuations and **post** for independent work. Unlike `std::execution`'s scheduler, which is designed for algorithm selection across heterogeneous hardware, the executor is purpose-built for I/O: it manages completion contexts, serialization, and thread affinity. - -```cpp -// Abbreviation for clarity of exposition -using coro = std::coroutine_handle<>; - -struct executor_work_queue; - -struct executor_work { - virtual void operator()() = 0; // responsible for deleting this - virtual void destroy() = 0; // to destroy pending (uninvoked) work - -protected: - ~executor_work() = default; - friend executor_work_queue; - executor_work* next_; -}; - -// Abstract base class for executors -struct executor_base { - virtual ~executor_base() = default; - virtual coro dispatch( coro h ) const = 0; - virtual void post( executor_work* w ) const = 0; - - // Frame allocation with default pass-through to global new/delete - virtual void* allocate_frame( std::size_t n ) const { return ::operator new( n ); } - virtual void deallocate_frame( void* p, std::size_t n) const { ::operator delete( p, n ); } - - // affine awaitable dispatcher - coro operator()( coro h ) const { return dispatch( h ); } -}; - -// Executor concept -template< class T > -concept executor = std::derived_from< T, executor_base >; - -// I/O object concept -template< class T > -concept io_object = requires( T const& t ) { - { t.get_executor() } -> std::convertible_to< executor_base const& >; -}; -``` - -### 6.1 Dispatch: Run This Continuation - -`dispatch` schedules a coroutine handle for resumption. If the caller is already in the executor's context, the implementation may resume inline; otherwise, the handle is queued. We use `std::coroutine_handle<>` rather than a templated callable because the coroutine frame is already allocated and the handle already type-erased—both come for free. - -When an I/O context thread dequeues a completion via `epoll_wait`, `GetQueuedCompletionStatus`, or `io_uring_wait_cqe`, it calls `dispatch` to resume the waiting coroutine. The return value enables symmetric transfer: rather than recursively calling `resume()`, the caller returns the handle to the coroutine machinery for a tail call, preventing stack overflow. - -Some contexts prohibit inline execution. A strand currently executing work cannot dispatch inline without breaking serialization—`dispatch` then behaves like `post`, queuing unconditionally. - -### 6.2 Post: Queue Independent Work - -`post` queues work for later execution. Unlike `dispatch`, it never executes inline—the work item is always enqueued, and `post` returns immediately. - -Use `post` for: -- **New work** that is not a continuation of the current operation -- **Breaking call chains** to bound stack depth -- **Safety under locks**—posting while holding a mutex avoids deadlock risk from inline execution - -A coroutine-first design reveals a novel choice: the caller allocates work and provides type-erasure by deriving from `executor_work`. The model expects derived classes to delete themselves in `operator()`. This inverts the traditional model where the executor allocates and wraps a callable. The caller knows the work's lifetime and can embed it in existing storage—a coroutine frame, a connection object, a pre-allocated pool. The intrusive `next_` pointer in the base class enables lock-free queues without separate node allocation. - -This ownership model enables an important optimization: a socket can preallocate its work object upon construction. Because the typical allocator, executor, and completion handler types are no longer stored in the operation state, the work object's size is fixed and known at compile time. The preallocated work simply updates its completion handler pointer before each submission. In this case, `operator()` invokes the handler but does not delete itself, `destroy()` does nothing, and the socket deletes the work in its destructor. - -### 6.3 Allocation Policy - -The executor owns allocation policy for coroutine frames. `allocate_frame` and `deallocate_frame` default to global `::operator new` and `::operator delete`. Concrete executors override these to provide: - -- **Thread-local recycling pools** for zero steady-state allocations -- **Bounded pools** for real-time systems with predictable latency -- **Per-tenant budgets** to prevent resource exhaustion in multi-tenant servers -- **Allocation tracking** for debugging and profiling - -I/O objects store an `executor_base*` and expose it via `get_executor()`. The promise's `operator new` detects `io_object` on arguments and calls `get_executor().allocate_frame(n)` for frame allocation. - -### 6.4 Type Erasure and Composition - -Executor types are fully preserved at call sites even though they're type-erased internally. This enables zero-overhead composition at the API boundary while maintaining uniform internal representation. The erasure boundary is pushed to where it matters least: internal propagation rather than the hot dispatch path. - -**Composable executor wrappers work naturally:** - -```cpp -template -struct strand { - Ex inner_; - - coro dispatch(coro h) const { - // Serialization logic, then delegate to inner - return inner_.dispatch(h); - } - // ... -}; - -// Full type information at the call site -strand s{pool.get_executor()}; -run_async(s, my_task()); // strand passed by value -``` - -When `run_async` stores the executor in the root task's frame, it preserves the complete type. The `strand` wrapper's serialization logic remains inlinable. Only when the executor propagates to child tasks—as `executor_base const&`—does type erasure occur. - -**Executors encode policy, not just parallelism.** A single-threaded `io_context` still benefits from executor customization: - -- **Serialization**: `strand` ensures operations on one connection don't interleave -- **Deferred execution**: `post` vs `dispatch` controls stack depth -- **Instrumentation**: Wrapper executors can measure latency, count operations, or propagate tracing context -- **Prioritization**: Critical control messages can preempt bulk data transfers -- **Testing**: Manual executors enable deterministic unit tests - -**The coroutine advantage.** Traditional callback-based designs embed the executor in the I/O object's type (`basic_socket`), leaking concurrency policy into type identity. Coroutines invert this: the executor enters at `run_async`, propagates invisibly through `executor_base const*`, and reaches I/O operations via `await_transform`. The I/O object participates in executor selection without encoding it in its type: - -```cpp -// Traditional: executor embedded in socket type -basic_socket> sock; - -// Coroutine model: executor supplied at launch -socket sock; // No executor type parameter -run_async(strand{pool.get_executor()}, use_socket(sock)); -``` - -### 6.5 Executor Wrapping Trade-offs - -For completeness, we acknowledge a case where callbacks are superior: executor wrappers that inject behavior around the posted work. - -Callbacks can wrap work without allocation because the wrapper embeds the callable by value into existing operation state. Our model requires allocating a `work` wrapper to inject behavior—one allocation per wrapped post. - -However, most practical executor wrappers—strands, thread pools, priority queues—do not need to wrap the work itself, only manage *when* and *where* it executes. For these, our model pays no penalty. The allocation cost appears only for exotic wrappers that must inject behavior around the work's execution. - ---- - -## 7. Propagation Mechanisms - -### 7.1 Allocator: Detection via Arguments - -*TODO: io_object concept, resolution precedence: explicit → I/O object → global.* - -### 7.2 Dispatcher: Forward Flow - -*TODO: await_transform injection, resolution precedence: explicit → I/O object → inline.* - -### 7.3 Why Not Thread-Local State - -*TODO* - ---- - -## 8. Context Propagation vs Backward Queries - -### 8.1 The std::execution Query Model - -*TODO* - -### 8.2 Forward Flow Eliminates Queries - -*TODO* - -### 8.3 The Affine Awaitable Protocol - -*TODO* - ---- - -## 9. The std::execution Tax - -If networking is required to integrate with `std::execution`, I/O libraries must pay a complexity tax regardless of whether they benefit from the framework's abstractions. - -### 9.1 What I/O Libraries Must Implement - -To participate in the sender/receiver ecosystem, networking code must implement: - -- **Query protocol compliance**: `get_env`, `get_domain`, `get_completion_scheduler`—even if only to return defaults -- **Concept satisfaction**: Meet sender/receiver requirements designed for GPU algorithm dispatch -- **Transform machinery**: Domain transforms execute even when they select the only available implementation -- **API surface expansion**: Expose attributes and queries irrelevant to I/O operations - -A socket returning `default_domain` still participates in the dispatch protocol. The P3826 machinery runs, finds no customization, and falls through to the default—overhead for nothing. - -### 9.2 Type Leakage Through connect_result_t - -The sender/receiver model solves a real problem: constructing a compile-time call graph for heterogeneous computation chains. When all types are visible at `connect()` time, the compiler can optimize across operation boundaries—inlining GPU kernel launches, eliminating intermediate buffers, and selecting optimal memory transfer strategies. For workloads where dispatch overhead is measured in nanoseconds and operations complete in microseconds, this visibility enables meaningful optimization. - -Networking operates in a different regime. I/O latency is measured in tens of microseconds (NVMe storage) to hundreds of milliseconds (network round-trips). A 10-nanosecond dispatch optimization is irrelevant when the operation takes 100,000 nanoseconds. The compile-time call graph provides no benefit—there is no GPU kernel to inline, no heterogeneous dispatch to optimize. - -Yet the sender/receiver model requires type visibility regardless: - -```cpp -// std::execution pattern -execution::sender auto snd = socket.async_read(buf); -execution::receiver auto rcv = /* ... */; -auto state = execution::connect(snd, rcv); // Type: connect_result_t -``` - -The `connect_result_t` type encodes the full operation state. Algorithms that compose senders must propagate these types: - -```cpp -// From P2300: operation state types leak into composed operations -template -struct _retry_op { - using _child_op_t = stdexec::connect_result_t>; - optional<_child_op_t> o_; // Nested operation state, fully typed -}; -``` - -For networking, this creates the template tax we sought to avoid (§2.1)—N×M instantiations, compile time growth, implementation details exposed through every API boundary—without the optimization payoff that justifies it for GPU workloads. Our design achieves zero type leakage; composed algorithms expose only `task` return types. - -### 9.3 The Core Question - -The question is not whether P2300/P3826 break networking code. They don't—defaults work. The question is whether networking should pay for abstractions it doesn't use. - -| Abstraction | GPU/Parallel Need | Networking Need | -|-------------|-------------------|-----------------| -| Domain-based dispatch | Critical | None | -| Completion scheduler queries | Required | Unused | -| Sender transforms | Algorithm selection | Pass-through only | -| Typed operation state | Optimization opportunity | ABI liability | - -Our analysis suggests the cost is not justified when a simpler, networking-native design achieves the same goals without the tax - ---- - -## 10. Conclusion - -We have presented an execution model designed from the ground up for coroutine-driven asynchronous I/O: - -1. **Minimal executor abstraction**: Two operations—`dispatch` and `post`—suffice for I/O workloads. No domain queries, no algorithm customization, no completion scheduler inference. - -2. **Clear responsibility model**: The executor decides allocation policy; the I/O object owns its executor. Both resources propagate forward through I/O object arguments. - -3. **Complete type hiding**: Executor types do not leak into public interfaces. Platform I/O types remain hidden in translation units. Composed algorithms expose only `task` return types. This directly enables ABI stability. - -4. **Forward context propagation**: Execution context flows with control flow, not against it. No backward queries. The affine awaitable protocol injects context through `await_transform`. - -5. **Conscious tradeoff**: One pointer indirection per I/O operation (~1-2 nanoseconds) buys encapsulation, ABI stability, and fast compilation. For I/O-bound workloads where operations take 10,000+ nanoseconds, this cost is negligible. - -The comparison with `std::execution` (§2.2) is instructive: that framework's complexity serves GPU workloads, not networking. P3826 adds machinery to fix problems networking doesn't have—domain-based algorithm dispatch, completion scheduler queries, sender transforms. Our design sidesteps these issues entirely because TCP servers don't run on GPUs. - -This divergence suggests that **networking deserves first-class design consideration**, not adaptation to frameworks optimized for heterogeneous computing. The future of asynchronous C++ need not be a single universal abstraction—it may be purpose-built frameworks that excel at their primary use cases while remaining interoperable at the boundaries - ---- - -## References - -1. [N4242](https://wg21.link/n4242) — Executors and Asynchronous Operations, Revision 1 (2014) -2. [N4482](https://wg21.link/n4482) — Some notes on executors and the Networking Library Proposal (2015) -3. [P2300R10](https://wg21.link/p2300) — std::execution (Michał Dominiak, Georgy Evtushenko, Lewis Baker, Lucian Radu Teodorescu, Lee Howes, Kirk Shoop, Eric Niebler) -4. [P2762R2](https://wg21.link/p2762) — Sender/Receiver Interface for Networking (Dietmar Kühl) -5. [P3552R3](https://wg21.link/p3552) — Add a Coroutine Task Type (Dietmar Kühl, Maikel Nadolski) -6. [P3826R2](https://wg21.link/p3826) — Fix or Remove Sender Algorithm Customization (Lewis Baker, Eric Niebler) -7. [Boost.Asio](https://www.boost.org/doc/libs/release/doc/html/boost_asio.html) — Asynchronous I/O library (Chris Kohlhoff) - ---- - -## Acknowledgements - -This paper builds on the foundational work of many contributors to C++ asynchronous programming: - -**Chris Kohlhoff** for Boost.Asio, which has served the C++ community for over two decades and established many of the patterns we build upon—and some we consciously depart from. The executor model in this paper honors his pioneering work. - -**Lewis Baker** for his work on C++ coroutines, the Asymmetric Transfer blog series, and his contributions to P2300 and P3826. His explanations of symmetric transfer and coroutine optimization techniques directly informed our design. - -**Dietmar Kühl** for P2762 and P3552, which explore sender/receiver networking and coroutine task types. His clear articulation of design tradeoffs—including the late-binding problem and cancellation overhead concerns—helped crystallize our understanding of where the sender model introduces friction for networking. - -The analysis in this paper is not a critique of these authors' contributions, but rather an exploration of whether networking's specific requirements are best served by adapting to general-purpose abstractions or by purpose-built designs diff --git a/context/promo-notes.md b/context/promo-notes.md deleted file mode 100644 index 2948719..0000000 --- a/context/promo-notes.md +++ /dev/null @@ -1,48 +0,0 @@ -corosio key insight: -coroutine frame allocation cannot be avoided for i/o patterns -the allocation we cannot avoid, pays for the type-erasure we need -the lifetime of parameters passed to your coroutine extends until you finish -type-erasure at the coroutine boundary is thus essentially free (you already paid for the frame) -7:15 -Asio cannot do this, because it was not written coroutines-first -7:16 -so in asio you end up with stupid shit like any_buffers -7:16 -in corosio, your buffer sequence stays in its natural type until the end where it is efficiently dissected into a platform struct -7:16 -in asio, you use any_executor and that gets passed through all the layers. if your executor is big, you pay -7:16 -in corosio, your executor no matter how complex (sizeof can be 500 bytes), it only costs 2 pointers -7:17 -in asio your completion handler grows with each layer of abstraction, struct within struct within struct. the compiler sees through the whole thing, yes, but those memmove of the ever-larger structs add up. (edited) -7:17 -in corosio, the "completion handler" is only ever one pointer (std::coroutine_handler) and each layer of abstraction adds just one more pointer -7:18 -5 layers of asio handlers could be 500 bytes, in corosio it is 40 bytes -7:18 -furthermore in asio, the i/o operation is templated on all that bullshit, creating NxM combinatorial explosion of both size and code -7:19 -in corosio, i/o objects operation sizes are known at compile time, not visible outside the TU, and preallocated. no need for a recycling ops allocator -7:19 -now all I need to do is talk about corosio's "negative overhead" -pdimov - 7:40 PM -JUCB -Vinnie - 7:48 PM -JUCB. -7:49 -complex refactoring, handled -http does not depend on boost.buffers, fix cmake and jamfiles, change all includes to corresponding capy buffers includes, rename symbols if needed, compile and fix until successful -7:49 -merging buffers to capy was the right move -7:49 -Oh, here's the cool thing -7:50 -when I finish this, it will show how the C++ committee has a path to standardizing the coroutines-first execution model optimized for networking (capy) and they can leave the networking piece to third parties, sidestepping all the bickering and gnashing of design teeth -7:50 -with this, there's no need to standardize corosio -7:50 -you can write for capy and then just link in your favorite lib -7:51 -ABI-stable by design \ No newline at end of file diff --git a/context/research/affine-extension.md b/context/research/affine-extension.md deleted file mode 100644 index 107879c..0000000 --- a/context/research/affine-extension.md +++ /dev/null @@ -1,276 +0,0 @@ -# Extending the Affine Protocol: Multi-Capability Coordination - -This document analyzes how to extend `await_suspend` with multiple capabilities (dispatcher, stop token, etc.) using an environment-based approach aligned with P2300. - ---- - -## Problem Statement - -### The Core Conflict - -The affine awaitables proposal extends `await_suspend` with a second parameter: - -```cpp -auto await_suspend(std::coroutine_handle<> h, Dispatcher const& d); -``` - -If another proposal (e.g., for stop token propagation) uses the same extension point: - -```cpp -auto await_suspend(std::coroutine_handle<> h, StopToken const& st); -``` - -**An awaitable cannot opt into both protocols simultaneously.** Both claim the same parameter position with incompatible types. Each new capability doubles the overload set—with N capabilities, you need 2^N overloads. - -### Why This Matters - -1. **Protocol incompatibility**: Two valuable protocols cannot coexist in the same awaitable -2. **Ecosystem fragmentation**: Libraries must choose which protocol to support -3. **Future-proofing failure**: Any new capability (allocator, priority, deadline, etc.) faces the same problem - ---- - -## Solution: Environment Object - -Instead of separate parameters, pass a single environment that provides multiple capabilities: - -```cpp -auto await_suspend(std::coroutine_handle<> h, Env const& env); -``` - -The awaitable queries for what it needs: - -```cpp -template -auto await_suspend(std::coroutine_handle<> h, Env const& env) { - if constexpr (requires { get_dispatcher(env); }) { - auto& d = get_dispatcher(env); - // use dispatcher for affine resumption - } - if constexpr (requires { get_stop_token(env); }) { - auto& st = get_stop_token(env); - // register for cancellation - } -} -``` - -**Benefits:** -- One parameter forever, infinitely extensible -- Awaitables only pay for what they use -- Aligns with P2300's receiver/environment pattern - ---- - -## Precedent: P3826R2 - -[P3826R2](https://wg21.link/P3826) ("Fix or Remove Sender Algorithm Customization") addresses a parallel problem in the sender/receiver world. - -P3826 identifies that senders face the same "blind context" issue: - -> "Many senders do not know where they will complete until they know where they will be started." - -The fix: **pass the receiver's environment when querying the sender**: - -```cpp -// Old (broken): sender doesn't know its context -auto dom = get_domain(get_env(sndr)); - -// New (fixed): tell sender where it will start -auto dom = get_completion_domain(get_env(sndr), get_env(rcvr)); -``` - -### Key Findings - -1. **Environment queries with additional arguments are sanctioned.** P3826 Section 4.1.1: "A query that accepts an additional argument is novel in std::execution, but **the query system was designed to support this usage**." - -2. **The query mechanism is defined.** P3826 changes `env<>::query` to accept variadic arguments. - -3. **Multiple orthogonal queries coexist.** P3826 adds `get_completion_domain`, `get_completion_domain`, and `get_completion_domain`—demonstrating that dispatcher + stop token can coexist. - ---- - -## Implementation - -### Query CPOs - -```cpp -// Customization point objects (would be standardized) -inline constexpr struct get_dispatcher_t { - template - auto operator()(Env const& env) const - -> decltype(env.query(get_dispatcher_t{})) { - return env.query(get_dispatcher_t{}); - } -} get_dispatcher{}; - -inline constexpr struct get_stop_token_t { - template - auto operator()(Env const& env) const - -> decltype(env.query(get_stop_token_t{})) { - return env.query(get_stop_token_t{}); - } -} get_stop_token{}; -``` - -### Environment Type - -```cpp -// Environment bundling multiple capabilities -template -struct awaitable_env { - Dispatcher const* dispatcher_; - StopToken const* stop_token_; - - // Query interface (P2300-style) - auto query(get_dispatcher_t) const noexcept -> Dispatcher const& { - return *dispatcher_; - } - auto query(get_stop_token_t) const noexcept -> StopToken const& { - return *stop_token_; - } -}; -``` - -### Environment-Injecting Awaiter - -```cpp -// Awaiter that injects the environment into await_suspend -template -struct env_awaiter { - Awaitable awaitable_; - Env env_; - - bool await_ready() { - return awaitable_.await_ready(); - } - - template - auto await_suspend(std::coroutine_handle h) { - // Pass environment to awaitable if it accepts it - if constexpr (requires { awaitable_.await_suspend(h, env_); }) { - return awaitable_.await_suspend(h, env_); - } else { - // Fallback for legacy awaitables - return awaitable_.await_suspend(h); - } - } - - decltype(auto) await_resume() { - return awaitable_.await_resume(); - } -}; -``` - -### Promise with await_transform - -```cpp -template -struct promise_type { - // Storage for capabilities - some_dispatcher dispatcher_; - std::stop_token stop_token_; - - template - auto await_transform(Awaitable&& a) { - using A = std::remove_cvref_t; - - // Create environment bundling both capabilities - using Env = awaitable_env; - Env env{&dispatcher_, &stop_token_}; - - return env_awaiter{ - std::forward(a), - env - }; - } - - // ... rest of promise -}; -``` - -### Awaitable Implementation - -An awaitable that wants both capabilities queries the environment: - -```cpp -struct my_async_operation { - bool await_ready() { return false; } - - template - auto await_suspend(std::coroutine_handle<> h, Env const& env) { - // Query for dispatcher (required for affine) - auto& dispatcher = get_dispatcher(env); - - // Query for stop token (optional) - if constexpr (requires { get_stop_token(env); }) { - auto& token = get_stop_token(env); - // Register cancellation callback... - } - - // Start async work, resume via dispatcher - start_async_work([h, &dispatcher]() { - dispatcher(h); // Resume through dispatcher - }); - - return std::noop_coroutine(); - } - - int await_resume() { return result_; } -}; -``` - -### Extensibility - -Adding a third capability (e.g., allocator) requires no changes to existing awaitables: - -```cpp -template -struct extended_env { - Dispatcher const* dispatcher_; - StopToken const* stop_token_; - Allocator const* allocator_; - - auto query(get_dispatcher_t) const noexcept { return *dispatcher_; } - auto query(get_stop_token_t) const noexcept { return *stop_token_; } - auto query(get_allocator_t) const noexcept { return *allocator_; } -}; -``` - -Existing awaitables that only query for `get_dispatcher` and `get_stop_token` continue to work unchanged. - ---- - -## Revised Protocol - -```cpp -// Environment concept (aligned with P2300) -template -concept awaitable_env = requires(E const& e) { - { get_dispatcher(e) } -> dispatcher; -}; - -// Affine awaitable concept (updated) -template -concept affine_awaitable = - awaitable_env && - requires(A a, std::coroutine_handle

h, E const& env) { - a.await_suspend(h, env); - }; -``` - ---- - -## Next Steps - -1. **Coordinate with P2300 authors**: Ensure the environment pattern aligns with `std::execution`'s direction -2. **Engage with stop token proposal authors**: Converge on a unified environment -3. **Consider a foundational paper**: The environment concept for awaitables may warrant its own proposal -4. **Evaluate `queryable` integration**: Determine if affine environments should satisfy P2300's `queryable` concept - ---- - -## References - -- [P3826R2](https://wg21.link/P3826) — Fix or Remove Sender Algorithm Customization -- [P2300R10](https://wg21.link/P2300R10) — `std::execution` -- [P3325R1](https://wg21.link/P3325R1) — A Utility for Creating Execution Environments diff --git a/context/research/any-executor.md b/context/research/any-executor.md deleted file mode 100644 index 5a7840f..0000000 --- a/context/research/any-executor.md +++ /dev/null @@ -1,174 +0,0 @@ -# Type-Erased Executors: Performance Analysis - -Analysis of the performance trade-offs when using a polymorphic `any_executor` -wrapper versus concrete template-based executors, based on N4242 and N4482. - -## Introduction - -N4242 proposes a two-layer executor design: - -1. **Concrete executor types** — templates like `thread_pool::executor_type`, - `strand`, where the compiler sees the full implementation -2. **Polymorphic wrapper** — a type-erased `executor` class for runtime flexibility - -The question arises: can we use only the type-erased wrapper and still get -the performance benefits of templates? The answer is no, and this document -explains why with concrete code examples. - -## The Executor Implementations - -```cpp -// Concrete executor - compiler sees everything -struct pool_executor { - thread_pool* pool_; - - template - void dispatch(F&& f) { - if (pool_->running_in_this_thread()) - std::forward(f)(); // inline execution - else - pool_->enqueue(std::forward(f)); - } -}; - -// Type-erased wrapper (like std::function but for executors) -class executor { - struct ops { - void (*dispatch)(void*, void*); - }; - ops const* ops_; - void* target_; - -public: - template - void dispatch(F&& f) { - // Must wrap F into type-erased callable, then indirect call - auto wrapper = [](void* p) { (*static_cast(p))(); }; - ops_->dispatch(target_, /*erased f*/); - } -}; -``` - -## Using Them in a Coroutine - -```cpp -// TEMPLATE VERSION -template -struct final_awaiter { - Executor ex; - std::coroutine_handle<> parent; - - void await_suspend(std::coroutine_handle<>) { - ex.dispatch([h = parent]{ h.resume(); }); - } -}; - -// TYPE-ERASED VERSION -struct final_awaiter { - executor ex; // type-erased - std::coroutine_handle<> parent; - - void await_suspend(std::coroutine_handle<>) { - ex.dispatch([h = parent]{ h.resume(); }); - } -}; -``` - -## What the Compiler Generates - -**Template version** — after inlining, `await_suspend` becomes: - -```cpp -void await_suspend(std::coroutine_handle<>) { - // ex.dispatch(...) fully inlined: - if (ex.pool_->running_in_this_thread()) { - parent.resume(); // direct call - } else { - ex.pool_->enqueue([h = parent]{ h.resume(); }); - } -} -``` - -**Type-erased version** — compiler cannot inline past the function pointer: - -```cpp -void await_suspend(std::coroutine_handle<>) { - // ex.dispatch(...) emits: - auto* fn_ptr = ex.ops_->dispatch; // load from memory - (*fn_ptr)(ex.target_, /*wrapped lambda*/); // indirect call - // ^^^ compiler has no idea what fn_ptr points to - // ^^^ cannot inline, cannot optimize, cannot eliminate -} -``` - -## The Optimization That's Lost - -From N4242 §9, the key insight for `defer()`: - -```cpp -// Template: compiler sees this is a continuation, can use thread-local queue -template -void complete_read(Executor& ex) { - ex.defer([this]{ read_loop(); }); - // Compiler inlines defer(), sees thread-local optimization, - // emits: local_queue.push_back(handler) — no lock! -} - -// Type-erased: compiler just sees an indirect call -void complete_read(executor& ex) { - ex.defer([this]{ read_loop(); }); - // Compiler emits: call through function pointer - // Cannot see the thread-local optimization exists -} -``` - -## strand — The Real Cost - -```cpp -// Fully optimized: both strand logic AND inner dispatch inline -strand s(pool.get_executor()); -s.dispatch([]{...}); -// Compiler produces: check strand, check pool thread, call directly - -// One level of indirection -strand s(type_erased); -s.dispatch([]{...}); -// Compiler: inlines strand logic, but inner ex.dispatch() is indirect call - -// Wrapped strand — virtual at boundary -executor ex = strand(pool.get_executor()); -ex.dispatch([]{...}); -// Compiler: indirect call to strand::dispatch, -// but INSIDE that, the pool dispatch is inlined -``` - -## Performance Impact - -From N4242 §9: - -> "On recent hardware we can observe an uncontended lock/unlock cost of some -> 10 to 15 nanoseconds, compared with 1 to 2 nanoseconds for accessing a -> thread-local queue." - -The `defer()` optimization saves ~13ns per call. But with type erasure, the -compiler can't see that `defer()` uses a thread-local queue, so it can't -eliminate the lock. The indirect call itself adds another ~2-5ns (branch -misprediction + icache miss). - -**Per-coroutine-switch overhead**: ~15-20ns with type erasure vs ~2ns with templates. - -At 100,000 switches/second: 2ms vs 0.2ms — 10x difference. - -## Conclusion - -Type erasure has its place (runtime polymorphism, ABI boundaries, heterogeneous -containers), but for hot paths like coroutine continuations, templates are -essential for zero-overhead abstraction. - -The N4242 design provides both: use templates by default, type-erase only -when you need the flexibility. - -## References - -- N4242: Executors and Asynchronous Operations, Revision 1 (2014) -- N4482: Some notes on executors and the Networking Library Proposal (2015) diff --git a/context/research/async-op.md b/context/research/async-op.md deleted file mode 100644 index 71a6da5..0000000 --- a/context/research/async-op.md +++ /dev/null @@ -1,259 +0,0 @@ -# Asio's Asynchronous Operation Composition Model - -This document analyzes how Asio's template-driven initiating functions work, -how operations compose, and whether composed operations truly compile down -to a single struct. - -## The Template-Driven Initiating Function Pattern - -Asio's initiating functions follow this pattern: - -```cpp -template -auto async_read_some(MutableBufferSequence buffers, CompletionToken&& token) -{ - return async_initiate( - initiation_object{}, // The actual implementation - token, - std::move(buffers) - ); -} -``` - -Key components: - -1. **`CompletionToken`** - A template parameter that determines how results - are delivered (callback, future, coroutine, etc.) - -2. **`async_result`** - A trait that: - - Defines `completion_handler_type` - the actual handler type - - Defines `return_type` - what the initiating function returns - - Transforms the token into a handler - -3. **`async_initiate`** - Connects the initiation logic to the completion - token machinery - -## How Composition Works - -When `ssl_stream::async_read_some` calls the underlying socket's -`async_read_some`, it creates a composed operation: - -```cpp -template -struct ssl_read_op -{ - ssl_stream& stream_; - MutableBuffer buffer_; - Handler handler_; - int state_ = 0; - - template - void operator()(Self& self, error_code ec = {}, size_t bytes = 0) - { - switch(state_++) { - case 0: - // Start SSL handshake/read - stream_.next_layer().async_read_some( - ssl_buffer_, - std::move(self)); // <-- Handler is concrete type, visible! - return; - case 1: - // Process SSL data, maybe loop - if (need_more_data()) { - state_ = 0; - (*this)(self, {}, 0); - return; - } - // Complete - handler_(ec, decrypted_bytes); - } - } -}; -``` - -## The Critical Question: Does It Compile to One Struct? - -**Answer: It depends on how you compose.** - -### Scenario 1: Template Composition (YES - Single Struct Possible) - -When the composed operation **keeps concrete types visible**: - -```cpp -template -void async_ssl_read(ssl_stream& s, buffer& b, Handler&& h) -{ - // Handler type is VISIBLE to compiler - ssl_read_op> op{s, b, std::forward(h)}; - // ... -} -``` - -The compiler sees: - -``` -ssl_read_op - └── contains: socket_read_op> - └── contains: io_uring_op> -``` - -**All nested as ONE aggregate type** - the compiler can inline everything. - -### Scenario 2: Type-Erased Composition (NO - Separate Allocations) - -When using `any_io_executor` or `std::function`: - -```cpp -void async_ssl_read(ssl_stream& s, buffer& b, std::function h) -{ - // Handler type is ERASED - compiler cannot see through it -} -``` - -Each layer becomes a separate allocation/vtable lookup. - -## Verification Against Asio's Documentation - -From the [Boost.Asio Composition Documentation](https://www.boost.org/doc/libs/latest/doc/html/boost_asio/overview/composition/compose.html): - -> "The `async_compose` function simplifies the implementation of composed -> asynchronous operations. It **automatically wraps a stateful function -> object** with a conforming intermediate completion handler." - -The key insight from the docs: - -1. **`async_compose` creates a single operation struct** containing: - - Your state machine implementation - - The wrapped completion handler - - Work tracking objects - -2. **Template preservation is essential** - the operation struct remains - templated on the handler type: - -```cpp -template -struct composed_op -{ - // Your implementation object - Implementation impl_; - - // Work guard for the handler's executor - executor_work_guard work_; - - // The actual handler - Handler handler_; -}; -``` - -## What Really Happens at the Call Site - -```cpp -// At call site of ssl_stream::async_read_some -ssl.async_read_some(buffer, [](error_code ec, size_t n) { - // my completion handler -}); -``` - -### Template Model (Fast Path) - -``` -┌────────────────────────────────────────────┐ -│ ssl_read_op │ ← Single allocation -│ ├── state machine (int state_) │ -│ ├── SSL buffers │ -│ ├── reference to next_layer socket │ -│ └── lambda_type handler_ │ (handler inlined) -└────────────────────────────────────────────┘ - │ - │ calls socket.async_read_some(buffer, *this) - │ where *this IS the continuation handler - ▼ -┌────────────────────────────────────────────┐ -│ Compiler sees ssl_read_op IS the handler │ -│ Can inline the entire continuation! │ -└────────────────────────────────────────────┘ -``` - -### Type-Erased Model (Slow Path) - -``` -┌─────────────────────────────┐ -│ any_io_executor wrapper │ ← allocation #1 -│ └── vtable* │ -└─────────────────────────────┘ - │ - ▼ -┌─────────────────────────────┐ -│ ssl_read_op │ ← allocation #2 -│ └── std::function<...> │ ← allocation #3 (maybe) -└─────────────────────────────┘ - │ - │ indirect call through vtable - ▼ -┌─────────────────────────────┐ -│ socket layer (also erased) │ ← more indirection -└─────────────────────────────┘ -``` - -## Performance Comparison - -| Aspect | Template Composition | Type-Erased Composition | -|--------|---------------------|------------------------| -| **Single struct?** | Yes - nested templates | No - separate allocations | -| **Inlining** | Full | None across boundaries | -| **Allocations** | Often 0-1 (SBO) | 1 per layer | -| **Call overhead** | ~2-5ns | ~15-25ns per layer | -| **Compiler visibility** | Complete | Opaque vtable | - -## Empirical Evidence - -The benchmark in `test/unit/bench.cpp` demonstrates this difference: - -```cpp -// Template - compiler inlines everything -template_executor ex; -ex.dispatch([&]{ g_sink.write(42); }); // ~2-5ns - -// Type-erased - vtable + indirect call -any_executor ex(template_executor{}); -ex.dispatch([&]{ g_sink.write(42); }); // ~15-25ns -``` - -The composition depth test further proves that template composition scales -linearly (often constant due to inlining) while type-erased scales -multiplicatively with each layer adding ~15-25ns. - -## Implications for Library Design - -### When to Use Template Composition - -- Performance-critical paths (hot loops, high-frequency I/O) -- Internal implementation layers -- When compile-time is acceptable - -### When to Use Type Erasure - -- API boundaries to reduce header dependencies -- When runtime polymorphism is genuinely needed -- When compile-time cost is prohibitive - -### The Hybrid Approach - -Libraries like Asio offer both: - -```cpp -// Fast path: concrete executor -tcp::socket::executor_type ex = socket.get_executor(); - -// Flexible path: type-erased -any_io_executor ex = socket.get_executor(); -``` - -This lets users choose the tradeoff appropriate for their use case. - -## References - -- [Boost.Asio Asynchronous Operations](https://www.boost.org/doc/libs/latest/doc/html/boost_asio/reference/asynchronous_operations.html) -- [Boost.Asio Composition](https://www.boost.org/doc/libs/latest/doc/html/boost_asio/overview/composition/compose.html) -- [Boost.Asio Completion Tokens](https://www.boost.org/doc/html/boost_asio/overview/model/completion_tokens.html) -- N4242: Executors and Asynchronous Operations, Revision 1 (2014) diff --git a/context/research/dispatch.md b/context/research/dispatch.md deleted file mode 100644 index b24ed81..0000000 --- a/context/research/dispatch.md +++ /dev/null @@ -1,13 +0,0 @@ -## Context: Callback vs Coroutine Invocation Semantics in Asio - -**Callbacks use immediate invocation on entry.** Asio examples show composed operations launched directly from constructors or completion handlers without dispatch/post. The caller is trusted to understand the execution context. - -**Coroutines use post/dispatch on entry.** `co_spawn` implementation (1.74.0+) begins with `co_await post(executor, ...)` before executing user code. Later versions (1.80+) changed to `dispatch` for optimization when already in correct context. - -**Rationale for difference:** Coroutine frames may be allocated and reference executor-specific state before any user code runs. Asio ensures safety by forcing execution onto the correct executor before the coroutine body begins. Callbacks have no such intermediate state - the initiating function runs synchronously and only completions route through the executor. - -**Implication for coroutine library design:** To match callback performance characteristics, provide two launch mechanisms: -1. A safe launch (`dispatch_io`) that posts/dispatches for initial entry from unknown contexts -2. An immediate launch (`invoke_io`) for subsequent operations within a chain where the executor context is already correct - -Once the first async operation completes, the chain remains on the correct executor indefinitely - matching callback behavior where all work after initial launch executes in `io_context::run()` threads. diff --git a/context/research/execution.md b/context/research/execution.md deleted file mode 100644 index 613e68e..0000000 --- a/context/research/execution.md +++ /dev/null @@ -1,318 +0,0 @@ -# Execution Model: post, dispatch, and defer - -Research notes on executor semantics for capy, incorporating material from -N4242 (Executors and Asynchronous Operations) and N4482 (Executor Semantics). - -## The Minimal Executor Concept - -After discussion with Peter Dimov, we concluded the minimal foundational executor API is: - -```cpp -struct executor -{ - template - void post(NullaryCallable&&); - - template - void dispatch(NullaryCallable&&); -}; -``` - -This is the smallest API upon which everything—including senders—can be built. - -## Progress Guarantees - -From N4482, executors provide a **parallel progress guarantee**: submitted work -will eventually execute, but only after its first step begins. This is weaker -than a concurrent guarantee (where work makes progress independently). - -| Guarantee | Meaning | Example | -|-----------|---------|---------| -| **Parallel** | Work runs eventually, shares threads | Thread pool, io_context | -| **Concurrent** | Work makes independent progress | new_thread_executor | -| **Weakly parallel** | May need external nudging | Resumable functions on executors | - -Coroutines built on parallel executors get weakly parallel progress—the -executor provides the parallel guarantee, but the coroutine itself may suspend -indefinitely waiting for events. - -## Semantic Definitions - -### post - -**Never blocks the caller pending completion of f.** - -From N4242 §15.9.1: -> The executor shall not invoke f in the current thread of execution prior to -> returning from post. - -- Guaranteed the callable does NOT run before `post()` returns -- Never runs inline, always goes through the queue -- Use for: new independent work - -### dispatch - -**Permitted to block the caller until f finishes.** - -From N4242 §15.9.1: -> The executor may invoke f in the current thread of execution prior to -> returning from dispatch. - -- If called from within the executor's context → execute immediately -- If called from outside → enqueue like `post` -- May block until completion if running inline -- Use for: continuations (resuming suspended work) - -### defer - -**Like post, but hints continuation relationship.** - -From N4242 §15.9.1: -> defer is used to convey the intention of the caller that the submitted -> function is a continuation of the current call context. The executor may -> use this information to optimize or otherwise adjust the way in which f -> is invoked. - -- Always enqueues (never inline) -- Implementation may defer beyond current handler invocation -- Enables thread-local queue optimization (see below) -- Use for: yielding to other queued work, fairness - -## Why Both post and dispatch? - -### Correctness, Not Just Optimization - -From N4482 §3.1.1: -> A consequence of dispatch()'s specification is that its use can introduce -> deadlock. This occurs when the caller holds a mutex and f attempts to -> acquire the same mutex. Thus the choice between dispatch() and post() -> impacts **program correctness**. A hint, which by definition need not be -> respected, is an inappropriate way for the caller to express its intention. - -The distinction is **not a hint**—it affects whether your program deadlocks. - -### post alone is insufficient - -With only `post`, every continuation bounces through a queue even when already on the correct executor: - -```cpp -// Already running on executor X -async_op.then([](auto result) { - // With post-only: queued, context switch, runs later - // With dispatch: runs inline, zero overhead -}); -``` - -### dispatch expresses continuation semantics - -When an async operation completes, the handler is a *continuation* of the current logical operation. The completing operation knows: - -- "I'm finishing on context X" -- "The handler wants to run on context Y" -- If X == Y → run it now (dispatch) -- If X ≠ Y → post to Y - -This is the essential optimization that makes coroutines competitive with hand-written state machines. - -## The defer Optimization - -From N4242 §9, consider a chain of async reads where each completes immediately -(data already in kernel buffers): - -**With post (naïve):** -``` -#6 — lock mutex -#7 — dequeue read_loop -#8 — unlock mutex -#9 — call read_loop -#1 — call post -#2 — lock mutex -#3 — enqueue read_loop -#4 — notify condition -#5 — unlock mutex -(repeat) -``` - -Each cycle requires **two lock/unlock pairs** plus a condition variable notification. - -**With defer (optimized):** -``` -#3 — lock mutex -#4 — flush thread-local queue to main queue -#5 — dequeue read_loop -#6 — unlock mutex -#7 — call read_loop -#1 — call defer -#2 — enqueue to thread-local queue (no lock!) -(repeat) -``` - -By using a thread-local queue, defer eliminates one lock/unlock pair and avoids -unnecessary thread wakeups. On modern hardware: ~15ns for uncontended lock vs -~2ns for thread-local access. - -## Formal Executor Requirements - -From N4242 §15.9.1, an executor type X must provide: - -| Expression | Semantics | -|------------|-----------| -| `x.dispatch(f, a)` | May invoke f in current thread prior to returning | -| `x.post(f, a)` | Shall not invoke f in current thread prior to returning | -| `x.defer(f, a)` | Same as post, but hints f is a continuation | -| `x.on_work_started()` | Increment outstanding work count | -| `x.on_work_finished()` | Decrement outstanding work count | -| `x.context()` | Return reference to execution_context | - -The allocator parameter `a` allows the executor to use caller-provided memory -for storing the queued function object. - -## Coroutine Affinity Example - -Consider a coroutine chain with affinity: - -```cpp -task c() { co_return; } -task b() { co_await c(); } -task a() { co_await b(); } - -spawn(ex, a().on(ex), handler); // a has affinity, b and c inherit -``` - -### Transition Analysis - -| Transition | Type | Recommended | Rationale | -|------------|------|-------------|-----------| -| a → b (starting b) | Starting child | Symmetric transfer | Initiating new work, already on `ex` | -| b → c (starting c) | Starting child | Symmetric transfer | Same—continuing the chain | -| c → b (c completes) | Continuation | **dispatch** | Resuming suspended parent | -| b → a (b completes) | Continuation | **dispatch** | Resuming suspended parent | - -### Execution Trace with Async I/O - -``` -1. Thread A (ex): a runs, awaits b → symmetric transfer -2. Thread A (ex): b runs, awaits c → symmetric transfer -3. Thread A (ex): c runs, starts async I/O, suspends -4. Thread B (I/O): I/O completes, resumes c -5. Thread B (I/O): c's final_suspend — NOT on ex! - → dispatch: posts continuation to ex -6. Thread A (ex): b resumes from queue -7. Thread A (ex): b's final_suspend — already on ex - → dispatch: runs inline (no queue bounce) -8. Thread A (ex): a resumes immediately -``` - -With **dispatch**: Steps 5 posts (wrong context), step 7 runs inline (right context). - -With **post everywhere**: Step 7 would unnecessarily queue. - -With **symmetric transfer everywhere**: Step 5 would violate affinity. - -## capy::task Should Always Dispatch - -For `capy::task`, all uses of the dispatcher are resuming suspended coroutines: - -| Site | What's Happening | Nature | -|------|------------------|--------| -| `final_suspend` | Child done, resume parent | Continuation | -| Async op completes | Resume awaiting task | Continuation | -| `spawn` completion | Task done, call handler | Continuation | - -There's no case where task submits new independent work through the dispatcher. Therefore, the `executor_dispatcher` should use dispatch: - -```cpp -struct executor_dispatcher -{ - executor ex_; - - template - void operator()(F&& f) const - { - if (ex_) - ex_.dispatch(std::forward(f)); // continuation semantics - else - std::forward(f)(); - } -}; -``` - -## When to Use post from await_suspend - -From the perspective of an async operation's completion path: - -### Use dispatch (default) - -- Normal async completion -- Efficient inline execution when on correct context -- The common case - -### Use post when: - -1. **Synchronous completion safety**: Operation might complete before initiation returns - ```cpp - start_async_read([h, &d](Data result) { - // Might be called BEFORE start_async_read returns! - d.post(h); // Safe: never inline - }); - ``` - -2. **Reentrancy**: Completing while holding locks - ```cpp - void complete_all_waiters() { - std::lock_guard lock(mutex_); - for (auto& w : waiters_) { - w.dispatcher.post(w.handle); // User code runs after lock released - } - } - ``` - -3. **Stack depth**: Batch-completing many operations - ```cpp - for (auto& op : completed_ops) { - op.dispatcher.post(op.handle); // Constant stack depth - } - ``` - -4. **Fairness**: Yielding to other queued work after heavy computation - -### Decision Framework - -| Situation | Use | -|-----------|-----| -| Normal async completion | dispatch | -| Might complete synchronously | post | -| Completing while holding locks | post | -| Batch-completing many ops | post | -| Want to yield/be fair | post or defer | - -## Relationship to Senders (P2300) - -The sender model's `schedule()` + `start()` decomposes into these primitives: - -```cpp -auto sender = schedule(ex) | then([]{ /* work */ }); -``` - -When started, the scheduler internally uses: -- `dispatch` if already on correct context (inline optimization) -- `post` if not (deferred execution) - -The post/dispatch model is foundational—senders are built on top of it. - -## References - -- N4242: Executors and Asynchronous Operations, Revision 1 (2014) -- N4482: Some notes on executors and the Networking Library Proposal (2015) -- Boost.Asio executor model: https://www.boost.org/doc/libs/release/doc/html/boost_asio/overview/model/executors.html -- P2300: std::execution: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2300r10.html - -## Summary - -- **post** = "here's new independent work" (always queued, never blocks caller) -- **dispatch** = "here's the next step of what I'm doing" (inline if same context, may block) -- **defer** = "queue this continuation" (enables thread-local optimization) - -The choice between dispatch and post is **correctness**, not optimization. -For coroutine continuations, dispatch is correct. Post is the escape hatch -when you need guaranteed deferred execution. diff --git a/context/research/klemens-morgenstern.md b/context/research/klemens-morgenstern.md deleted file mode 100644 index eca55f2..0000000 --- a/context/research/klemens-morgenstern.md +++ /dev/null @@ -1,155 +0,0 @@ -## Additional Feedback (2025-12-31) - -### Key Concerns - -1. **await_transform Overload Specificity**: Many promise types have multiple `await_transform` overloads (e.g., `co_await asio::this_coro::executor` returns a noop awaitable). The paper templates `await_transform` making everything affine, but libraries need a concept to determine what should be made affine. Suggested pattern: - ```cpp - template - auto await_transform(Sender&& sndr) noexcept { - return as_awaitable(affine_on(std::forward(sndr), SCHED(*this)), *this); - } - - template A> - auto await_transform(A&& a); - ``` - This allows libraries like Boost.Cobalt or Asio to add support without modifying existing awaitables, and enables compile-time rejection of non-affine awaitables for zero-allocation guarantees. - -2. **Dispatcher Concept Usage**: The `dispatcher` concept exists in the paper but isn't used anywhere. Should be used in constraints. Suggested concept: - ```cpp - template - concept dispatcher = requires (D d, std::coroutine_handle

h) {{d(h)};}; - ``` - This is important if a library author already has a second parameter for `await_suspend` and needs to distinguish it. - -3. **Simplification Opportunity**: Can simplify `d([h] { h.resume(); })` to just `d(h)` since `std::coroutine_handle<>` has an `operator()`. This was already incorporated. - -4. **Concept Enables Both Approaches**: The `affine_awaitable` concept enables: - - **Tiered mode**: Compile-time detection with fallback (paper's emphasis on compatibility) - - **Strict mode**: Compile-time rejection for zero-allocation guarantees (colleague's use case) - Both are valid uses of the same primitive. - -### Implementation Notes - -- The dispatcher being passed by `const&` (not `&&`) was questioned, but confirmed as correct in the protocol. -- Buffer-based allocation for `make_affine` was discussed but deemed not viable for standardization. -- The colleague confirmed the paper looks fine overall, with these details as improvements. - ---- - -## Additional Content from Paper Sections 7.1-7.6 - -### §7.1 Non-Breaking Overload Addition - -Adding `await_suspend(h, d)` alongside existing `await_suspend(h)` is completely non-breaking. The following table shows which overload is selected in each context: - -| Context | Awaitable Type | Overload Selected | -|---------|---------------|-------------------| -| Legacy awaitable (only `await_suspend(h)`) | Non-affine | `await_suspend(h)` (legacy path) | -| Affine awaitable (both overloads) | Affine | `await_suspend(h, d)` (affine path) | -| Legacy code awaiting affine awaitable | Affine | `await_suspend(h)` (backward compatible) | -| Affine-aware code awaiting legacy awaitable | Non-affine | `await_suspend(h)` (fallback to legacy) | - -This ensures that existing code continues to work unchanged, while new affine-aware code can opt into zero-allocation affinity. - -### §7.2 Strict Mode: Compile-Time Enforcement - -Strict mode uses `await_transform` constrained with `static_assert` to reject non-affine awaitables at compile time, providing the zero-allocation guarantee: - -```cpp -template A> -auto await_transform(A&& a) { - return affine_awaiter{std::forward(a), &dispatcher_}; -} - -template -auto await_transform(A&& a) { - static_assert(affine_awaitable, context_type>, - "Only affine awaitables are accepted in strict mode"); - return affine_awaiter{std::forward(a), &dispatcher_}; -} -``` - -This compile-time enforcement ensures that non-affine awaitables are rejected before code generation, providing a hard guarantee of zero allocations. - -### §7.3 Tiered vs. Strict: Same Primitives, Different Policies - -The `affine_awaitable` concept enables two distinct usage patterns with the same primitives: - -| Approach | Policy | Use Case | -|----------|--------|----------| -| **Tiered (§6)** | Accept all, optimize what we can | Library adoption, gradual migration | -| **Strict (§7.2)** | Reject non-affine at compile time | Performance-critical systems, embedded | - -Both approaches use the same `affine_awaitable` concept and protocol—the difference is in the policy enforced by `await_transform`: -- **Tiered mode**: Uses `if constexpr` to detect affine awaitables and fall back to `make_affine` for non-affine ones -- **Strict mode**: Uses `static_assert` to reject non-affine awaitables at compile time - -### §7.4 Gradual Library Evolution - -The concept serves as a migration tool with three phases: - -**Phase 1: Status quo** -- Works everywhere -- 1 allocation fallback for non-affine awaitables via `make_affine` - -**Phase 2: Add affine overload** -- Non-breaking addition of `await_suspend(h, d)` to existing awaitables -- Zero-allocation for affine callers -- Legacy callers continue to work unchanged - -**Phase 3: Remove legacy overload** -- Compile-time enforcement via `static_assert` -- Zero-allocation guarantee across the entire chain -- Requires all awaitables in the chain to be affine - -This phased approach allows libraries to migrate incrementally without breaking existing code. - -### §7.5 Example: Strict-Mode Task - -Complete `strict_task` implementation demonstrating compile-time enforcement: - -```cpp -template -class strict_task { - struct promise_type { - Dispatcher const* dispatcher_ = nullptr; - - template A> - auto await_transform(A&& a) { - return affine_awaiter{std::forward(a), dispatcher_}; - } - - template - auto await_transform(A&& a) { - static_assert(affine_awaitable, Dispatcher>, - "strict_task only accepts affine awaitables"); - // Unreachable, but satisfies return type requirement - return affine_awaiter{std::forward(a), dispatcher_}; - } - - // ... rest of promise implementation - }; - - // ... rest of task implementation -}; -``` - -This ensures that any attempt to `co_await` a non-affine awaitable in a `strict_task` results in a compile-time error. - -### §7.6 Migration Tool, Not Just Detection - -The concept serves a dual purpose that should be explicitly reframed: - -**Detection (tiered mode)**: `if constexpr` selection -- Detects whether an awaitable is affine at compile time -- Falls back to `make_affine` for non-affine awaitables -- Enables gradual migration and library compatibility - -**Enforcement (strict mode)**: `static_assert` guarantee -- Rejects non-affine awaitables at compile time -- Provides hard zero-allocation guarantee -- Enables performance-critical and embedded use cases - -The same `affine_awaitable` concept enables both patterns—it's not just about detection, but about providing a migration path from today's ecosystem to zero-overhead scheduler affinity. - - diff --git a/context/research/mateusz-pusz.md b/context/research/mateusz-pusz.md deleted file mode 100644 index 0c243c3..0000000 --- a/context/research/mateusz-pusz.md +++ /dev/null @@ -1,119 +0,0 @@ -The problem with P3552 is that the synopsis says: - - template - auto await_transform(A&& a); - template - auto await_transform(change_coroutine_scheduler sch); - -Which should work for all the types, but then the interfaces are described as: - - template - auto await_transform(Sender&& sndr) noexcept; - - template - auto await_transform(change_coroutine_scheduler sch) noexcept; - -which adds the constraints to the first overload, causing the compile-time issue that Vinnie correctly points out in his paper. This needs to be fixed indeed. - -In general, the proposal is really good, but it looks a bit like AI-generated text ;-), which some Committee members may not like. -Here are some comments: - -Regarding the code: - -template -auto await_transform(Awaitable&& a) noexcept { - using A = std::remove_cvref_t; - - // Detect types that explicitly opt into being senders - constexpr bool is_explicit_sender = requires { typename A::sender_concept; }; - - if constexpr (same_as) - return std::forward(a); - else if constexpr (is_explicit_sender) - // OPTIMIZED: Explicit senders use continues_on directly - return execution::as_awaitable( - execution::continues_on(std::forward(a), SCHED(*this)), - *this); - else - // FALLBACK: Plain awaitables use the trampoline - return make_affine(std::forward(a), SCHED(*this)); -} - -1. I would encourage changing the `is_explicit` sender to the definition compatible with is-sender. -2. If I am not wrong, the first branch will not cover awaiting the senders on an `inline_scheduler`. Put differently, `co_await sndr` will fail to compile because `sndr` will most likely not provide an awaiter interface or support `operator co_await`. -3. It could also be possible to add a branch here that checks `is_ready` here as well, and not do anything at all if it returns `true`. In such cases, the coroutine won't be suspended, so there is no need to worry about its resumption. - -Regarding the code: - -template -auto make_affine(Awaitable&& awaitable, Dispatcher& dispatcher) - -> affinity_trampoline> -{ - if constexpr (is_void_v>) { - co_await get_awaitable(std::forward(awaitable)); - co_await dispatch_awaitable{dispatcher}; - } else { - auto result = co_await get_awaitable(std::forward(awaitable)); - co_await dispatch_awaitable{dispatcher}; - co_return result; - } -} - -4. My understanding is that `get_awaitable` is not needed, as this is what `co_await` does by itself. If Vinni really insists on keeping it, then the correct name for it should be `get_awaiter`. Awaitable - something that we co_await on. Awaiter - a type providing three member functions. -5. It is worth pointing out that with the proposed approach, the `await_resume` will still be executed in another context (thread, scheduler, etc) for asynchronous awaitables (assuming is_ready() == false). Only after that is the execution transferred back to the original context. -6. I didn't test it, but it is worth checking out if `auto&& result = ...`and `co_return std::forward(result);` would not be a better solution (for non-copyable or expensive to copy types). NRVO does not work for coroutines. -7. Instead of `dispatch_awaitable` maybe we can use `as_awaitable(continues_on(dispatcher))`? Adding dispatcher abstraction has its costs. -8. The description below is inaccurate. Probably it should be something like that: - - 1. Suspends initially - - 2. Its awaiter is never ready - - 3. Stores the caller's handle on co-await on its awaiter, uses symmetric control transfer to resume itself, and then co_awaits the provided awaitable - - 4. When the inner awaitable completes, dispatches to the scheduler, and returns the value obtained from the awaitable (if any) - - 5. Uses symmetric transfer (final_suspend returns the caller's handle) to resume the original coroutine - -Regarding point "3.2 Simpler Specification" - -9. I don't think that the comparison is fair. In `make_affine` case we have even more complex `await_transform` (although unconstrained). The proposed solution also used "`as_awaitable` conversion". How "No sender-specific handling in await_transform" is true? - -Regarding "3.3 Simpler Mental Model" - -10. "Every co_await on STD::TASK resumes on the scheduler." - -Regarding "3.4 HALO-Friendly Design" - -11. I am not aware of any compiler that can apply HALO for asynchronous awaitables. If you know about such a case, please let me know. For now HALO works great only for generator coroutines. - -Regarding "3.7 Non-Intrusive Third-Party Support" - -12. What is `my_affine_task`? It requires the user to implement `await_transform` in the same way, right? Should the paper propose implementing those changes in `with_awaitable_senders` to make it easier for the users to provide compatible coroutines with CRTP? -13. How this would be done: "This is simpler than attempting to wrap third-party task types, which would require complex metaprogramming to intercept promise type customization points."? We could do await_transform to intercept those, but that does not "intercept promise type customization points". - -Regarding "4.1 Additional Coroutine Frame (When HALO Fails)" - -14. "HALO is well-supported in modern compilers (GCC, Clang, MSVC)". As written above, I did not see any task implementation so far that would benefit from HALO on any compiler. Please let me know if you made it work. - -Regarding "4.3 Specification Complexity" - -15. `dispatch_awaitable` and `get_awaitable` are probably not needed as stated above - -Regarding "5.3 `make_affine` as a Complementary Building Block" - -16. `auto await_transform(Awaitable&& a)` shadows the one from the `execution::with_awaitable_senders`. Use using-declaration to "import" overloads from the base class. See my comment above proposing to integrate affinity handling into `with_awaitable_senders`. - -Regarding "5.4 Comparison of Approaches" - -17. I think that chapter can be dropped. Only AI that probably generated it ;-) can "easily" understand it. The purpose and scope of the proposal are clear at this point, and such a comparison is not useful here. - -Regarding "5.5 Non-Intrusive Extension of Third-Party Coroutines" - -18. "One might consider creating wrapper types or standard metaprograms to extend third-party task types with scheduler affinity." I don't see how someone knowing coroutines could consider that. This chapter can be removed as well. - -Regarding "6. Impact on P3552R3" - -19. The entire proposal references P3553R3 and criticizes the approach or provides a diff like in this chapter. This paper was accepted at the plenary meeting and is merged and "dead" now. The paper should refer to the ISO standard draft at this point. - -Regarding "Appendix A: Reference Implementation" - -20. Instead of storing both the value and the exception, `promise_type` should use `std::expected` or `std::variant`. -21. `transfer_to_caller` does not need to keep `caller_`. It may be easily obtained from the `await_suspend` argument. Also, it could inherit from `std::suspend_always`. -22. I would extract `return_value` and `return_void` to separate classes so I don't have to specialize the entire `affinity_trampoline` for `void. See https://github.com/mpusz/mp-coro/blob/main/src/include/mp-coro/bits/task_promise_storage.h. -23. It is a good practice to keep the awaiter interface away from the awaitable via `operator co_await`. \ No newline at end of file diff --git a/context/research/n4242.md b/context/research/n4242.md deleted file mode 100644 index 453c567..0000000 --- a/context/research/n4242.md +++ /dev/null @@ -1,4161 +0,0 @@ -Doc. no: N4242 -Date: 2014-10-13 -Revises: N4046 -Reply-To: Christopher Kohlhoff -Executors and Asynchronous Operations, Revision 1 -1. Introduction -N3785 Executors and schedulers, revision 3 describes a framework for executors. Unfortunately, this framework is built around some deliberate design choices that make it unsuited to the execution of fine grained tasks and, in particular, asynchronous operations. Primary among these choices is the use of abstract base classes and type erasure, but it is not the only such issue. The sum effect of these choices is that the framework is unable to exploit the full potential of the C++ language. - -In this document, we will look at an alternative executors design that uses a lightweight, template-based policy approach. To describe the approach in a nutshell: - -An executor is to function execution as an allocator is to allocation. - -This proposal builds on the type traits described in N4045 Library Foundations for Asynchronous Operations, Revision 2 to outline a design that unifies the following areas: - -Executors and schedulers. -Support for resumable functions or coroutines. -A model for asynchronous operations. -A flexible alternative to std::async(). -In doing so, it takes concepts from Boost.Asio, many of which have been unchanged since its inclusion in Boost, and repackages them in a way that is more suited to C++14 language facilities. - -2. Changes in this revision -N4046 introduced a comprehensive library that covered included support for composition and coordination of chains of operations. In this revision we present a minimal subset of the proposal. This includes the facilities required to support a model for asynchronous operations, as required by the networking proposal. It also includes features that enable the use cases supported by N3785. - -3. Reference implementation -A standalone reference implementation of the proposed library can be found at http://github.com/chriskohlhoff/executors. This implementation requires a C++14 compiler. - -To better illustrate how the executors library interacts with asynchronous operations, and to allow the facility to be used in production code, the library has been backported into the variant of Asio that stands alone from Boost. This variant is available at https://github.com/chriskohlhoff/asio/tree/master. - -4. A two minute introduction to the library -Run a function asynchronously: - -post([]{ - // ... - }); -Run a function asynchronously, on your own thread pool: - -thread_pool pool; - -post(pool, []{ - // ... - }); - -pool.join(); -Run a function asynchronously and wait for the result: - -std::future f = - post(package([]{ - // ... - return 42; - })); - -std::cout << f.get() << std::endl; -Run a function asynchronously, on your own thread pool, and wait for the result: - -thread_pool pool; - -std::future f = - post(pool, package([]{ - // ... - return 42; - })); - -std::cout << f.get() << std::endl; -Run a function in the future and wait for the result: - -std::future f = - post_after( - std::chrono::seconds(1), - package([]{ - // ... - return 42; - })); - -std::cout << f.get() << std::endl; -5. Library vocabulary -The central concept of this library is the executor as a policy. An executor embodies a set of rules about where, when and how to run a function object. For example: - -Executor type - -Executor rules - -system_executor - -Function objects are allowed to run on any thread in the process. - -thread_pool::executor_type - -Function objects are allowed to run on any thread in the pool, and nowhere else. - -strand - -Run function objects according to the underlying executor's rules, but also run them in FIFO order and not concurrently. - -Executors are ultimately defined by a set of type requirements, so the set of executors isn't limited to those listed here. Like allocators, library users can develop custom executor types to implement their own rules. Executors allow us to encapsulate all sorts of additional information and behaviour on a fine-grained basis, such as: - -Priority. -Preferred CPU affinity. -Security credentials or impersonation context. -How exceptions should be handled. -Certain objects may have an associated executor, which specifies how any function objects related to the object should be executed. For example, we may want to say that all event callbacks associated with a network protocol implementation should execute on a particular thread, or that a task should run at a particular priority. The notion of an associated executor allows us to decouple the specification of execution behaviour from the actual point of execution. - -An execution context is a place where function objects are executed. Where executors are lightweight and cheap to copy, an execution context is typically long-lived and non-copyable. It may contain additional state such as timer queues, socket reactors, or hidden threads to emulate asynchronous functionality. Examples of execution contexts include thread_pool, loop_scheduler, a Boost.Asio io_service, and the set of all threads in the process. - -We say that a thread_pool is an execution context, and that it has an executor. The thread pool contains long-lived state, namely the threads that persist until the pool is shut down. The thread pool's executor embodies the rule: run functions in the pool and nowhere else. The thread pool's executor may be obtained by calling its get_executor() member function. - -To submit a function object to an executor or execution context, we can choose from one of three fundamental operations: dispatch, post and defer. These operations differ in the eagerness with which they run the submitted function. - -A dispatch operation is the most eager. - -dispatch(ex, []{ ... }); -It means: run the function immediately, in the calling thread, if the rules allow it; otherwise, submit for later execution. - -Executor type - -Executor rules - -Behaviour of dispatch - -system_executor - -Function objects are allowed to run on any thread in the process. - -Always runs the function object before returning from dispatch(). - -thread_pool::executor_type - -Function objects are allowed to run on any thread in the pool, and nowhere else. - -If we are inside the thread pool, runs the function object before returning from dispatch(). Otherwise, adds to the thread pool's work queue. - -strand - -Run function objects according to the underlying executor's rules, but also run them in FIFO order and not concurrently. - -If we are inside the strand, or if the strand queue is empty, runs the function object before returning from dispatch(). Otherwise, adds to the strand's work queue. - -A consequence of calling dispatch() is that, if the executor’s rules allow it, the compiler is able to inline the function object call. - -A post operation, on the other hand, is not permitted to run the function object itself. - -post(ex, []{ ... }); -It means: submit the function for later execution; never run the function object immediately. A posted function is scheduled for execution as soon as possible, according to the rules of the executor. - -Executor type - -Executor rules - -Behaviour of post - -system_executor - -Function objects are allowed to run on any thread in the process. - -Like std::async(), the system executor may allocate std::thread objects to run the submitted function objects. A typical implementation may be to use a hidden system-wide thread pool. - -thread_pool::executor_type - -Function objects are allowed to run on any thread in the pool, and nowhere else. - -Adds the function object to the thread pool's work queue. - -strand - -Run function objects according to the underlying executor's rules, but also run them in FIFO order and not concurrently. - -Adds the function object to the strand's work queue. - -Finally, the defer operation is the least eager of the three. - -defer(ex, []{ ... }); -A defer operation is similar to a post operation, in that it means: submit the function for later execution; never run the function object immediately. However, a defer operation also implies a relationship between the caller and the function object being submitted. It is intended for use when submitting a function object that represents a continuation of the caller. - -Executor type - -Executor rules - -Behaviour of defer - -system_executor - -Function objects are allowed to run on any thread in the process. - -If the caller is executing within the system-wide thread pool, saves the function object to a thread-local queue. Once control returns to the system thread pool, the function object is scheduled for execution as soon as possible. - -If the caller is not inside the system thread pool, behaves as a post operation. - -thread_pool::executor_type - -Function objects are allowed to run on any thread in the pool, and nowhere else. - -If the caller is executing within the thread pool, saves the function object to a thread-local queue. Once control returns to the thread pool, the function object is scheduled for execution as soon as possible. - -If the caller is not inside the specified thread pool, behaves as a post operation. - -strand - -Run function objects according to the underlying executor's rules, but also run them in FIFO order and not concurrently. - -Adds the function object to the strand's work queue. - -6. Library examples -6.1. Emulating std::async() -6.2. Active objects -6.3. Activatable objects -6.4. Leader/Followers pattern -6.5. Asynchronous operations -6.6. Pipelines -6.7. Actors -6.8. Priority scheduler -In this section we will examine a selection of examples, to see how the proposed executors library supports a range of use cases. - -6.1. Emulating std::async() -The behaviour of std::async() function, when used with std::launch::async, may be trivially emulated as follows: - -template -auto async(F&& f, Args&&... args) -{ - return post( - package( - std::bind(std::forward(f), - std::forward(args)...))); -} -Starting from the inside out, the expression: - -std::bind(std::forward(f), - std::forward(args)...) -creates a function object that will invoke f with the specified arguments. Next: - -package(...) -returns an object that will be lazily converted into a std::packaged_task<>. We could also have used std::packaged_task<> directly, as in: - -std::packaged_task< - std::result_of_t< - std::decay_t(std::decay_t...)>>(...) -In this example, the package() function saves on typing by determining the return type of F for us. Finally: - -post(...) -submits the function object for execution on another thread, and then returns immediately. When we submit a std::packaged_task<>, post() automatically deduces its return type to be the std::future<> type produced by the task. This future object is then returned from our version of async(). Note that, unlike std::async(), the returned future object's destructor will not block. - - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/async_1.cpp - -6.2. Active objects -In the Active Object design pattern, all operations associated with an object are run in its own private thread. - -To implement an active object, we begin by defining a class member that is a thread pool containing a single thread. - -class bank_account -{ - int balance_ = 0; - mutable thread_pool pool_{1}; - // ... -}; -We then define each public member function so that it posts its implementation to the thread pool. - -class bank_account -{ - // ... - void deposit(int amount) - { - post(pool_, - package([=] - { - balance_ += amount; - })).get(); - } - // ... -}; -In more detail, to implement an active object operation we begin by defining the body of the function: - -[=] -{ - balance_ += amount; -} -which we then wrap in a lazily created std::packaged_task<>: - -package(...) -Finally, we submit the packaged task to the pool and wait for it to complete. When we submit a std::packaged_task<>, post() automatically deduces its return type to be the std::future<> type produced by the task. We can use this future to block until the operation is complete. - -post(...).get(); - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/bank_account_2.cpp - -6.3. Activatable objects -An Activatable object is a variant of the Active Object pattern where the object does not have a private thread of its own. Instead, it can borrow one of the calling threads to process operations[1]. However, like Active Object, it ensures that all member state changes do not occur on more than one thread at a time. - -To implement an activatable object, we create a strand on the system executor. - -class bank_account -{ - int balance_ = 0; - mutable strand strand_; - // ... -}; -We then define each public member function so that it dispatches its implementation to the strand. - -class bank_account -{ - // ... - void deposit(int amount) - { - dispatch(strand_, - package([=] - { - balance_ += amount; - })).get(); - } - // ... -}; -Recall that a system_executor object embodies this rule: - -Function objects are allowed to run on any thread in the process. - -while a strand embodies this rule: - -Run function objects according to the underlying executor's rules, but also run them in FIFO order and not concurrently. - -Finally, the call to dispatch() means: - -Run the function immediately, in the calling thread, if the rules allow it; otherwise, submit for later execution. - -Thus, when we combine system_executor, strand and dispatch(): - -dispatch(strand_, []{ ... }); -we are effectively saying: if the strand is not busy, run the function object immediately. If there is no contention on the strand, latency is minimised. If there is contention, the strand still ensures that the function object never runs concurrently with any other function object submitted through the same strand. - - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/bank_account_3.cpp - -6.4. Leader/Followers pattern -The Leader/Followers design pattern is a model where multiple threads take turns to wait on event sources in order to dispatch and process incoming events. - -Consider an example where a connection handler is responsible for receiving messages from a client via UDP. The Leader/Followers pattern is implemented using a thread_pool object: - -class connection_handler -{ - // ... -private: - udp_socket socket_; - thread_pool thread_pool_; - // ... -}; -and involves the sequence of operations below. - -void connection_handler::receive_and_dispatch() -{ -The leader thread waits for the next message to arrive. - - char buffer[1024]; - std::size_t length = socket_.receive(buffer, sizeof(buffer)); -A new message has arrived. The leader thread promotes a follower to become the new leader. - - std::experimental::post(thread_pool_, - [this]{ receive_and_dispatch(); }); -The now former leader processes the message. - - // Process the new message and pass it to the order management bus. - std::istringstream is(std::string(buffer, length)); - order_management::new_order event; - if (is >> event) - order_management_bus_.dispatch_event(event); -} -When the function returns, the former leader automatically returns to the pool as a follower thread. - - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/trading/server/ - -6.5. Asynchronous operations -Asynchronous operations are often chained, and in many cases an object may be associated with two or more chains. For example, an object to manage a connection may contain one chain to do the writing, and another to do the reading: - -class connection -{ - tcp::socket socket_; - mutable_buffers_1 in_buffer_; - mutable_buffers_1 out_buffer_; - - // ... - - void do_read() - { - socket_.async_read_some(in_buffer_, - [this](error_code ec, size_t n) - { - // ... process input data ... - if (!ec) do_read(); - }); - } - - void do_write() - { - // ... generate output data ... - async_write(socket_, out_buffer_, - [this](error_code ec, size_t n) - { - if (!ec) do_write(); - }); - } -}; -When these chains are run on a single-threaded event loop, it is not possible for more than one completion handler to run at any given time. This means that no synchronisation is required to protected shared data. However, if handlers are executed on a thread pool then some form of synchronisation will be required to avoid introducing data races. - -The proposed library provides the strand<> template to synchronise handlers. A strand ensures that completion handlers never run concurrently, and explicit synchronisation (such as a mutex) is still not required to protect shared data. To implement this, we use the one strand for all asynchronous operations associated with the object. - -class connection -{ - tcp::socket socket_; - mutable_buffers_1 in_buffer_; - mutable_buffers_1 out_buffer_; - strand strand_; - - // ... - - void do_read() - { - socket_.async_read_some(in_buffer_, - wrap(strand_, [this](error_code ec, size_t n) - { - // ... process input data ... - if (!ec) do_read(); - })); - } - - void do_write() - { - // ... generate output data ... - async_write(socket_, out_buffer_, - wrap(strand_, [this](error_code ec, size_t n) - { - if (!ec) do_write(); - })); - } -}; -The wrap function is used to associated an executor with an object. In this example, we used wrap to associated the strand with each of the lambdas. The wrap function works with any executor or execution context. For example, here we associate a thread pool with a lamdba: - -async_getline(std::cin, - wrap(pool, [](std::string line) - { - std::cout << "Line: " << line << "\n"; - })); -Rather than using the wrap function, the associated executor may be manually specified by providing a nested executor_type typedef and get_executor() member function. - -class line_printer -{ -public: - typedef loop_scheduler::executor_type executor_type; - - explicit line_printer(loop_scheduler& s) - : executor_(s.get_executor()) - { - } - - executor_type get_executor() const noexcept - { - return executor_; - } - - void operator()(std::string line) - { - std::cout << "Line: " << line << "\n"; - } - -private: - loop_scheduler::executor_type executor_; -}; - -// ... - -async_getline(std::cin, line_printer(scheduler)); -For this to work correctly, the async_getline asynchronous operation must participate in an executor-aware model. To be executor-aware, an asynchronous operation must: - -Ask the completion handler for its associated executor, by calling get_associated_executor. -While pending, maintain an executor_work object for the associated executor. This tells the executor to expect a function object to be submitted in the future. A thread pool, for example, will know that it still has work to do and needs to keep running. -Dispatch, post or defer any intermediate handlers, and the final completion handler, through the associated executor. This ensures that all handlers are executed according to the executors rules. -Our async_getline operation can then be written as follows: - -template -void async_getline(std::istream& is, Handler handler) -{ -The make_work function automatically obtains the associated executor and creates an executor_work object for it. - - auto work = make_work(handler); -The asynchronous operation itself is posted outside of the associated executor. This is because we want the line reading to be performed asynchronously with respect to the caller. - - post([&is, work, handler=std::move(handler)]() mutable - { - std::string line; - std::getline(is, line); -Once the asynchronous work is complete, we execute the completion handler via its associated executor. - - // Pass the result to the handler, via the associated executor. - dispatch(work.get_executor(), - [line=std::move(line), handler=std::move(handler)]() mutable - { - handler(std::move(line)); - }); - }); -} - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/async_op_1.cpp - -When composing asynchronous operations, intermediate operations can simply reuse the associated executor of the final handler. - -template -void async_getlines(std::istream& is, std::string init, Handler handler) -{ - // Get the final handler's associated executor. - auto ex = get_associated_executor(handler); - - // Use the associated executor for each operation in the composition. - async_getline(is, - wrap(ex, [&is, lines=std::move(init), handler=std::move(handler)] - (std::string line) mutable - { - if (line.empty()) - handler(lines); - else - async_getlines(is, lines + line + "\n", std::move(handler)); - })); -} -This ensures that all intermediate completion handlers are correctly executed according to the caller's executor's rules. - - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/async_op_2.cpp - -6.6. Pipelines -A pipeline is a sequence of two or more long-running functions, known as stages, with each stage passing data to the next via a queue. The initial stage acts as a source of data, the intermediate stages act as filters, and the final stage as a sink. - -As an example, let us consider a small framework for implementing pipelines. The function used to construct a pipeline is declared as: - -template - std::future pipeline(F f, Tail... t); -This function returns a future that can be used to wait until the pipeline is complete. The initial stage of the pipeline must be a function with signature: - -void initial(queue_front out); -The intermediate stages have signature: - -void intermediate(queue_back in, queue_front out); -The pipeline's final stage has the signature: - -void final(queue_back in); -By default, we want each stage of a pipeline to have its own thread. The pipeline framework achieves this by calling get_associated_executor with two arguments: - -template -std::future pipeline(F f, Tail... t) -{ - // ... - auto ex = get_associated_executor(f, thread_executor()); - // ... -} -The thread_executor class is a custom executor type defined for the example. It starts a new thread for every function passed to dispatch, post or defer. If the function object type F already has an associated executor then that executor will be used. The thread_executor is used for types that do not specify an associated executor. - -So, when we construct and run a pipeline like this: - -void reader(queue_front out); -void filter(queue_back in, queue_front out); -void upper(queue_back in, queue_front out); -void writer(queue_back in); - -// ... - -thread_pool pool; -auto f = pipeline(reader, filter, wrap(pool, upper), writer); -f.wait(); -we are specifying that the the upper stage should run on the thread pool, while reader, filter and writer should use the default behaviour of launching a new thread. - - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/pipeline.cpp - -6.7. Actors -The Actor model is a model for concurrency where objects, known as actors, communicate only by sending and receiving messages. Each actor's state is accessed only by its own internal thread or strand. This means that actors are inherently thread-safe. - -To illustrate how executors may be used to facilitate actors, a tiny actor framework is included with the executors reference implementation. This framework is loosely based on the Theron library[2]. - -To implement an actor using this framework, we start by deriving a class from actor: - -class member : public actor -{ - // ... -When constructing an actor, we specify the executor to be used: - - explicit member(executor e) - : actor(std::move(e)) - { - // ... - } -The polymorphic type executor is used to allow the selection of an actor's executor to be delayed until runtime. All of an actor's message handlers are executed according to that policy. This could be a thread pool executor, but we may equally construct actors with an executor that knows about priorities. - -The actor's message handlers are member functions, identified by argument type, and may be arbitrarily registered or deregistered: - - void init_handler(actor_address next, actor_address from) - { - // ... - register_handler(&member::token_handler); - deregister_handler(&member::init_handler); - } -Internally, the actor framework uses a per-actor strand to ensure that the member functions are never called concurrently. - -To send a message between actors we use either actor::send() or actor::tail_send(). The send() operation is implemented in terms of the actor's executor's post() member function. The tail_send() function is a distinct operation and conveys additional information about the caller's intent which may be used to optimise inter-actor messaging. It is implemented in terms of defer(). - - void token_handler(int token, actor_address /*from*/) - { - // ... - tail_send(msg, to); - } - - // ... -}; - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/actor.cpp - -6.8. Priority scheduler -Executor objects are lightweight and copyable to allow us to encapsulate all sorts of additional information and behaviour on a fine-grained basis. One use case for this is attaching priorities to function objects or tasks. - -We begin by defining our priority scheduler class as an execution context. Internally, this class uses a priority queue to store pending function objects. - -class priority_scheduler : public execution_context -{ - // ... - -private: - - // ... - - struct item_comp - { - bool operator()( - const std::shared_ptr& a, - const std::shared_ptr& b) - { - return a->priority_ < b->priority_; - } - }; - - std::mutex mutex_; - std::condition_variable condition_; - std::priority_queue< - std::shared_ptr, - std::vector>, - item_comp> queue_; - bool stopped_ = false; -}; -The priority_scheduler class provides a nested class executor_type which satisfies the executor type requirements, and a member function get_executor() to obtain an executor object. On construction, an executor_type object captures a reference to the priority scheduler, as well as the specified priority value. - -class priority_scheduler : public execution_context -{ -public: - class executor_type - { - public: - executor_type(priority_scheduler& ctx, int pri) noexcept - : context_(ctx), priority_(pri) - { - } - - // ... - - private: - priority_scheduler& context_; - int priority_; - }; - - executor_type get_executor(int pri = 0) noexcept - { - return executor_type(*this, pri); - } - - // ... -}; -When a function object is submitted, the executor uses its stored priority to insert the function into the correct position in the priority queue: - -class priority_scheduler : public execution_context -{ -public: - class executor_type - { - public: - - // ... - - template - void post(Func f, const Alloc& a) - { - auto p(std::allocate_shared>(a, priority_, std::move(f))); - std::lock_guard lock(context_.mutex_); - context_.queue_.push(p); - context_.condition_.notify_one(); - } - - // ... - }; - - // ... -}; -The priority scheduler's executors can then be used like any other: - -priority_scheduler sched; -auto low = sched.get_executor(0); -auto med = sched.get_executor(1); -auto high = sched.get_executor(2); -// ... -dispatch(low, []{ std::cout << "1\n"; }); -dispatch(med, []{ std::cout << "2\n"; }); -dispatch(high, []{ std::cout << "3\n"; }); - -See complete example at: -https://github.com/chriskohlhoff/executors/blob/master/src/examples/executor/priority_scheduler.cpp - -7. Summary of library facilities -Header - -Name - -Description - -type_traits - -Class template handler_type - -Transforms a completion token into a completion handler. - -type_traits - -Class template async_result - -Determines the result of an asynchronous operation’s initiating function. - -type_traits - -Class template async_completion - -Helper to simplify implementation of an asynchronous operation. - -memory - -Class template associated_allocator - -Used to determine a handler’s associated allocator. - -memory - -Function get_associated_allocator - -Obtain a handler’s associated allocator. - -executor - -Class template execution_context - -Base class for execution context types. - -executor - -Class template associated_executor - -Used to determine a handler’s associated executor. - -executor - -Function get_associated_executor - -Obtain a handler’s associated executor. - -executor - -Class template executor_wrapper - -Associates an executor with an object. - -executor - -Function wrap - -Associate an executor with an object. - -executor - -Class template executor_work - -Tracks outstanding work against an executor. - -executor - -Function make_work - -Create work to track an outstanding operation. - -executor - -Class system_executor - -Executor representing all threads in system. - -executor - -Class executor - -Polymorphic wrapper for executors. - -executor - -Functions dispatch, post and defer - -Execute a function object. - -strand - -Class template strand - -Executor adapter than runs function objects non-concurrently and in FIFO order. - -timer - -Functions dispatch_at, post_at and defer_at - -Execute a function object at an absolute time. - -timer - -Functions dispatch_after, post_after and defer_after - -Execute a function object after a relative time. - -future - -Class template use_future_t - -Completion token to enable futures with asynchronous operations. - -future - -Class template specialization of async_result for packaged_task - -Supports use of packaged_task with dispatch, post, defer, and asynchronous operations. - -future - -Class templates packaged_token and packaged_handler - -Implements lazy creation of a packaged_task. - -future - -Function package - -Return a packaged_token for use with dispatch, post, defer, and asynchronous operations. - -thread_pool - -Class thread_pool - -A fixed size thread pool. - -loop_scheduler - -Class loop_scheduler - -A thread pool where threads are explicitly donated by the caller. - -8. On the naming of executors -There has been some confusion due to the reuse of the term executor in N3785 and this proposal, but with slightly different meanings. In N3785, an "executor" refers to a heavyweight, non-copyable object, such as a thread pool. In this proposal, an "executor" is a lightweight, copyable policy object. This is distinct from a heavyweight object such as a thread pool, which is known as an "execution context". - -N3785's API is superficially similar to Java executors, so it is interesting to examine the Java prior art in this area. What we find is that N3785 misses a key concept: the separation of Executor and ExecutorService. On the other hand, it turns out that this proposal's "executor" mirrors the concept and terminology of Java executors. - -Let us start by reviewing a couple of the core interfaces of the Java executor framework: http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/package-summary.html - -First, we have interface Executor. This interface provides a way of submitting a Runnable (i.e. the equivalent of a function object) for execution, and it decouples the submission from the concrete mechanism which runs the function. - -Second, we have interface ExecutorService. This extends Executor, i.e. ExecutorService is-a Executor. It adds some additional functionality, such as the ability to request that it be shut down. - -A thread pool is-a ExecutorService. A fork/join pool is-a ExecutorService. An ExecutorService represents a heavyweight entity where Runnable items are run. - -A SerialExecutor is-a Executor. A thread-per-task executor is-a Executor. An executor represents a policy. Where there is a customisation point (as in the ExecutorCompletionService class) it is specified as an Executor. - -When we want to create our own policy, we do it by implementing the Executor interface. Our Executor policy object can be short lived, or it can be long lived. As we are using Java, the object is newed and we let the garbage collector take care of it. In fact, we don't really have a choice. - -Java is not C++. Java references are not C++ references, nor are they C++ pointers. We do not have the garbage collector to clean up after us. Yet, and this is especially true of concurrent code, correctly managing object lifetime is critical. How do we address this in C++? The idiomatic approach is to use value semantics. - -Thus our Executor policy object should use value semantics. We should be able to copy it and move it freely. Where a particular concrete Executor uses some allocated resource, the constructors and destructors can manage the resource lifetime, just as we do in other standard library components. - -Of course, we do want to be able to use a heavyweight thread pool as an Executor. In Java, the thread pool is-a ExecutorService which is-a Executor, so we are able to use the heavyweight object in the same way as a lightweight one. Once again, this is because of Java's reference semantics, where basically all objects are treated the same, whether light or heavy. - -In C++, however, our heavyweight thread pool may be best represented by a non-copyable type. This presents a challenge: how do we establish a pattern where we can pass either a noncopyable type or a type with value semantics? That is, how can we have an interface where we can pass either a heavyweight ExecutorService or a lightweight Executor? - -The solution is to change the relationship between ExecutorService and Executor. Rather than saying an ExecutorService is-a Executor, we instead say an ExecutorService has-a Executor. Every ExecutorService has an associated lightweight Executor policy which encapsulates the submission logic. This lightweight Executor provides the necessary value semantics. - -Thus we can see that this proposal's "executor" is in fact the same concept as the Java Executor. It is just that it is packaged in a way that is more idiomatic C++, i.e. it uses value semantics. This proposal's "execution context" concept is the equivalent of Java's ExecutorService. The executor is the "how", a policy, and it logically performs the execution. The execution context or ExecutorService is the "where", a venue if you like. - -As we are using C++, and not Java, we get the same level of abstraction but with the benefits of compile time polymorphism, inlining, control over memory use, and using fewer allocations (i.e. we create less garbage). The reasons we are using C++ in the first place. - -As we can see, the separation of executor from execution context clearly exists in the prior art represented by Java. However, this proposal's design is derived from Boost.Asio, and is very much driven by what is required to make asynchronous operations work, but with a desire to have a clean separation of concerns. The Java executors framework did not inform the design, yet it is not surprising that Java represents an example of convergent evolution, once we make a deeper analysis of the library. - -9. On the need for dispatch, post and defer -Let us take another look at the specification of the Java Executor class and its execute method: http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/Executor.html - -It says: - -void execute(Runnable command) - -Executes the given command at some time in the future. The command may execute in a new thread, in a pooled thread, or in the calling thread, at the discretion of the Executor implementation. - -Note that the wording includes "in the calling thread". In fact, the specification of the execute method is essentially the same as the dispatch function of this proposal. - -As it is the only available Executor method, this specification of execute is a problem. Clearly there are times when we want to guarantee that a function will not run in the calling thread, such as when we want launch a long running function. Unfortunately, if we are using an Executor in a polymorphic context we have no way of knowing what behaviour we will get. We have to know the concrete executor type to ensure that it doesn't run in the calling thread. - -Thus, we wish to introduce a function with slightly different semantics: - -Executes the given command at some time in the future. The command may execute in a new thread, in a pooled thread, at the discretion of the Executor implementation. - -This is the post function of this proposal, and it lets us as the caller ensure that a function does not run in the calling thread. - -However, this does not obviate the need for the original semantics (i.e. the current Java semantics). There are times when it is to our advantage to allow a function to run in the calling thread. Some examples: - -When using a strand (or a serial executor, in Java terminology) to ensure non-concurrent function execution. If the strand is not contended, we want to execute the function in the current thread. This will minimise latency and avoid the cost of a context switch. -At the end of an asynchronous operation. We are unwrapping layers of composition and handing the result back through a stack of callbacks. A callback needs to run on its correct executor, but we do not want to unnecessarily incur a context switch or a cycle through the scheduler, as doing so significantly adds to the latency in processing the event. -Interestingly, the specification of future::then (now removed from the concurrency TS) that took an executor would also suffer from this extra cost unless it had access to dispatch semantics. - -The defer operation, like post, does not allow the function to run in the calling thread. Where it differs is in the expression of the intent of the caller. Using defer states that there is a relationship between the caller and callee, such as being consecutive links in a chain of function objects. An executor can make use of this to do smarter, more efficient scheduling. - -For example, consider a chain of asynchronous read operations on a socket: - -void read_loop(Socket socket, Buffer buffer) -{ - async_read(socket, buffer, - [&](error_code, size_t n) { - process_data(buffer, n); - read_loop(socket, buffer); - }); -} -where an individual read operation is implemented something like this: - -template -void async_read(Socket socket, Buffer buffer, Handler handler) -{ - // Perform a speculative read first. - error_code ec; - size_t n = non_blocking_read(socket, buffer, ec); - if (ec != would_block) - { - // Read completed immediately, post handler. - ex = get_associated_executor(handler); - post(ex, [=]{ handler(ec, n); }); - } - else - { - // Wait for socket to become readable. - // ... - } -} -In certain circumstances the read operation will always complete immediately, such as when the data is already available in the kernel buffers. When this occurs, the sequence of operations is essentially equivalent to: - -void read_loop(socket, buffer) -{ - // ... - ex.post([&]{ // #1 - read_loop(socket, buffer); - }); - // ... -} -Let us assume that our executor ex uses a thread pool with a mutex-protected queue: - -class my_thread_pool -{ -public: - class executor_type - { - public: - // ... - - template - void post(Func f, const Alloc& a) - { - auto p(std::allocate_shared>(a, std::move(f))); - std::lock_guard lock(pool_.mutex_); // #2 - pool_.queue_.push_back(std::move(p)); // #3 - pool_.condition_.notify_one(); // #4 - // #5 - } - - // ... - }; - - // ... - - void run() - { - for (;;) - { - std::unique_lock lock(mutex_); // #6 - condition_.wait(lock, [&]{ !queue_.empty(); }); - auto p(std::move(queue_.front())); // #7 - queue_.pop_front(); - lock.unlock(); // #8 - p->execute_(p); // #9 - } - } - -private: - std::mutex mutex_; - std::condition_variable condition_; - std::deque> queue_; -}; -There are two performance issues at play here. First, each "cycle" of read_loop involves two lock/unlock pairs. Second, a condition variable may be used to wake a sleeping thread when the queue is non-empty. If we step through the code we will see the following: - -#6 — lock -#7 — dequeue read_loop -#8 — unlock -#9 — call read_loop -#1 — call post -#2 — lock -#3 — enqueue read_loop -#4 — notify -#5 — unlock -(start of next cycle) -#6 — lock -#7 — dequeue read_loop -#8 — unlock -#9 — call read_loop -... -On the other hand, with defer we are telling the executor that the submitted function is a continuation of the current one. That is, the executor does not have to eagerly schedule the function because we have told it that one function follows the other. - -void read_loop(socket, buffer) -{ - // ... - ex.defer([&]{ // #1 - read_loop(socket, buffer); - }); - // ... -} - -class my_thread_pool -{ -public: - class executor_type - { - public: - // ... - - template - void defer(Func f, const Alloc& a) - { - if (pool_.thread_local_queue_) - { - auto p(std::allocate_shared>(a, std::move(f))); - pool_.thread_local_queue_->push_back(std::move(p)); // #2 - } - else - post(std::move(f), a); - } - - // ... - }; - - // ... - - void run() - { - std::deque> local_queue; - thread_local_queue_ = &local_queue; - for (;;) - { - std::unique_lock lock(mutex_); // #3 - while (!local_queue.empty()) // #4 - { - queue_.push(std::move(local_queue.front())); - local_queue.pop_front(); - } - condition_.wait(lock, [&]{ !queue_.empty(); }); - auto p(std::move(queue_.front())); // #5 - queue_.pop_front(); - lock.unlock(); // #6 - p->execute_(p); // #7 - } - } - -private: - std::mutex mutex_; - std::condition_variable condition_; - std::deque> queue_; - static thread_local std::deque>* thread_local_queue_; -}; -Now when we step through the code: - -#3 — lock -#4 — copy contents of thread-local queue to main queue -#5 — dequeue read_loop -#6 — unlock -#7 — call read_loop -#1 — call defer -#2 — enqueue read_loop to thread-local queue -(start of next cycle) -#3 — lock -#4 — copy contents of thread-local queue to main queue -#5 — dequeue read_loop -#6 — unlock -#7 — call read_loop -... -we see that we have eliminated one lock/unlock pair, and we also no longer need to wake another thread. We are able to do this because of the additional information imparted by defer. - -On recent hardware we can observe an uncontended lock/unlock cost of some 10 to 15 nanoseconds, compared with 1 to 2 nanoseconds for accessing a thread-local queue. There is also a significant (and often larger) benefit in avoiding the unnecessary thread wakeup and the ensuing lock contention, particularly when dealing with bursty traffic profiles. Either way, this is a latency win. - -With asynchronous operations, the rules are that if an operation completes immediately it posts the result (rather than dispatch, which may result in unfairness, starvation or stack overflow). If it finishes later, it dispatches the result (to minimise latency). - -By default, an individual low-level asynchronous operation, such as async_read shown above, doesn't know if the operation represents a continuation of the current function, or a new fork in the control flow. Either one is possible, so we conservatively assume that every operation represents a new fork and use post. - -However, once we move to a higher layer of abstraction, like a composed operation to read a message frame, we can start to make certain assertions. We know that within the operation it consists of a single chain of asynchronous reads. - -As an example, let us consider a hypothetical composed operation to read a message frame, implemented in terms of async_read above. Each message frame consists of a header, a body in several chunks, and a trailer. In this scenario, the header and body are immediately available in the kernel buffers, but we have to wait for the trailer to arrive. The sequence of executor operations used by the asynchronous chain looks like this: - -header available immediately → post() -first body chunk available immediately → post() -second body chunk available immediately → post() -... waiting for trailer ... -trailer available → dispatch() -One of the motivating reasons for having lightweight, copyable executors, distinct from the execution context, is that they let us remap the executor operations as required. Thus, within the composed operation we can remap post to defer. We can do this with a lightweight wrapper around the composed operation's handler's associated executor: - -template -class remap_post_to_defer -{ - ... - template - void post(F f, const A& a) - { - ex_.defer(std::move(f), a); - } - ... - Executor ex_; -}; -We can then apply this wrapper to optimise the intermediate steps of the chain: - -header available immediately → post() -first body chunk available immediately → defer() -second body chunk available immediately → defer() -... waiting for trailer ... -trailer available → dispatch() -If we had a limited vocabulary that only consisted of dispatch: - -header available immediately → dispatch() -first body chunk available immediately → dispatch() -second body chunk available immediately → dispatch() -... waiting for trailer ... -trailer available → dispatch() -then traffic bursts can lead to unfairness and starvation. We are susceptible to denial of service attacks. - -If our vocabulary only consisted of post: - -header available immediately → post() -first body chunk available immediately → post() -second body chunk available immediately → post() -... waiting for trailer ... -trailer available → post() -then every operation in the chain can incur otherwise avoidable synchronisation costs, context switches, and cycles through the scheduler, resulting in higher latency. - -If our vocabulary only consisted of defer: - -header available immediately → defer() -first body chunk available immediately → defer() -second body chunk available immediately → defer() -... waiting for trailer ... -trailer available → defer() -then we almost get away with it, apart from the additional latency introduced by defer at the end of the operation. However, we are also limiting the opportunities for concurrency. This may not be an issue in this example with a single chain of operations, but can be a problem where your asynchronous control flow really does fork, such as in a typical accept "loop": - -void handle_accept() -{ - new_socket->start(); // starts asynchronous reads and writes - async_accept(..., &handle_accept); // accepts next connection -} -Thus we need all three operations to complete the set: - -post — the default choice, guaranteeing non-blocking calls and maximising concurrency -dispatch — for minimising latency when we are prepared to accept blocking -defer — to link sequences of related operations -Note that, when implementing the executor type requirements, it is perfectly fine to start by implementing dispatch and defer in terms of post. This is in keeping with the specified semantics. Then, we can optimise the implementation of these functions as we are able to. - -However, are these three operations sufficient? Might there be more things that a user wants to communicate to the executor, about how a function or task should be launched? For example, a priority or a hard real-time deadline. - -The proposed library meets these needs by giving a function object or task an associated executor. As lightweight, copyable objects, executors allow us to encapsulate all sorts of additional information and behaviour on a fine-grained basis, such as priority. The associated executor determines how it should be executed, and the point of association may be distant in time and space from the point where a function is submitted using dispatch, post and defer. - -10. Impact on the standard -This is a pure library proposal. It does not add any new language features, nor does it alter any existing standard library headers. It makes additions to experimental headers that may also be modified by other Technical Specifications. - -This library can be implemented using compilers that conform to the C++14 standard. An implementation of this library requires operating system-specific functions that lie outside the C++14 standard. - -11. Relationship to other proposals -This proposal builds on the type traits defined in N4045 Library Foundations for Asynchronous Operations. This paper is intended as an alternative proposal to N3785 Executors and schedulers. - -A substantial subset of the executors library specified below is a prerequisite for the networking library. For this reason, the networking library's proposed text also incorporates a specification of these facilities. - -12. Conclusion -The type traits introduced in N4045 Library Foundations for Asynchronous Operations define an extensible asynchronous model that can support a variety of composition methods, including: - -Callbacks, where minimal runtime penalty is desirable. -Futures, and not just std::future but also future classes supplied by other libraries. -Coroutines or resumable functions, without adding new keywords to the language. -The library introduced in this paper applies this asynchronous model, and its design philosophy, to executors. Rather than a design that is restricted to runtime polymorphism, we can allow users to choose the approach that is appropriate to their use case. - -13. Acknowledgements -The author would like to thank Jamie Allsop, Arash Partow and Dietmar Kühl for providing feedback, corrections and suggestions on both the library implementation and this proposal. - -14. Appendix: Design issues in N3785 -14.1. Use of inheritance and polymorphism -14.2. Use of std::function -14.3. Scheduled work -14.4. Exception handling -14.5. Inline executors and the single add function -14.1. Use of inheritance and polymorphism -The interface is based on inheritance and polymorphism, rather than on templates, for two reasons. First, executors are often passed as function arguments, often to functions that have no other reason to be templates, so this makes it possible to change executor type without code restructuring. Second, a non-template design makes it possible to pass executors across a binary interface: a precompiled library can export a function one of whose parameters is an executor. - -As we will see in this proposal, a template-based design does not preclude the inclusion of a runtime polymorphic wrapper. Such a wrapper still allows users to write non-template code for use with executors, and makes it possible to pass executors across a binary interface. On the other hand, it is not possible to undo the performance impact of a type-erased interface. - -The cost of an additional virtual dispatch is almost certainly negligible compared to the other operations involved. - -This claim might be true when passing coarse-grained tasks across threads. However, use cases for executors are not limited to this. As outlined in N4045, composition of asynchronous operations may entail multiple layers of abstraction. The ability to leverage function inlining is a key part of delivering a low abstraction penalty, but the compiler is unable to see through a virtual interface. - -14.2. Use of std::function -Most fundamentally, of course, executor is an abstract base class and add() is a virtual member function, and function templates can’t be virtual. Another reason is that a template parameter would complicate the interface without adding any real generality. In the end an executor class is going to need some kind of type erasure to handle all the different kinds of function objects with void() signature, and that’s exactly what std::function already does. - -By forcing type erasure at the executor interface, an executor implementer is denied the opportunity to choose a more appropriate form of type erasure. For example, an implementer may wish to store pending work items in a linked list. With a template-based approach, the function object and the “next pointer” can be stored in the same object. This is not possible if type erasure has already occurred[3]. - -One theoretical advantage of a template-based interface is that the executor might sometimes decide to execute the work item inline, rather than enqueueing it for asynchronous, in which case it could avoid the expense of converting it to a closure. In practice this would be very difficult, however: the executor would somehow have to know which work items would execute quickly enough for this to be worthwhile. - -There is a key use case for wanting to execute work items inline: delivering the result of an asynchronous operation, possibly across multiple layers of abstraction. Rather than relying on the executor to “somehow have to know”, we can allow the user to choose. This is the approach taken in this proposal. - -Finally, another disadvantage of std::function is that it prevents the use of move-only function objects. - -Of course, as N3785 states, the need for a type-erased function object is itself a consequence of the use of inheritance and polymorphism. - -14.3. Scheduled work -There are several important design decisions involving that time-based functionality. First: how do we handle executors that aren’t able to provide it? The issue is that add_at and add_after involve significant implementation complexity. In Microsoft’s experience it’s important to allow very simple and lightweight executor classes that don’t need such complicated functionality. - -N3785 couples timer-based operations to executors via the scheduled_executor base class. This proposal avoids this coupling by distinguishing between the executor as a lightweight policy object, and an execution context where the “implementation complexity” of timers can be housed in a reusable way. Thus, timer operations are independent of executor types, and can be used with any executor. - -Second, how should we specify time? [...] Some standard functionality, like sleep_until and sleep_for, is templatized to deal with arbitrary duration and time_point specializations. That’s not an option for an executor library that uses virtual functions, however, since virtual member functions can’t be function templates. There are a number of possible options: - -1. Redesign the library to make executor a concept rather than an abstract base class. We believe that this would be invention rather than existing practice, and that it would make the library more complicated, and less convenient for users, for little gain. - -2. Make executor a class template, parameterized on the clock. As discussed above, we believe that a template-based design would be less convenient than one based on inheritance and runtime polymorphism. - -3. Pick a single clock and use its duration and time_point. - -We chose the last of those options, largely for simplicity. - -Unfortunately, N3785 chooses system_clock as that single clock. As the system clock is susceptible to clock changes it may be inappropriate for use cases that require a periodic timer. In those instances, steady_clock is the better choice. - -In any case, by decoupling timers from executors, this proposal provides timer operations that are indeed templates, and can work with arbitrary clock types. - -14.4. Exception handling -A more interesting question is what happens if a user closure throws an exception. The exception will in general be thrown by a different thread than the one that added the closure or the thread that started the executor, and may be far separated from it in time and in code location. As such, unhandled exceptions are considered a program error because they are difficult to signal to the caller. The decision we made is that an exception from a closure is ill-formed and the implementation must call std::terminate. - -Rather than apply a blanket rule, this proposal includes exception handling as part of an executor’s policy. While std::terminate is likely to be the best choice for unhandled exceptions inside a thread pool, users may prefer greater control when using something like loop_executor. For example, the approach taken by Boost.Asio’s io_service and this proposal’s loop_scheduler is to allow exceptions to escape from the “event loop”, where the user can handle them as appropriate. - -14.5. Inline executors and the single add function -[...] Inline executors, which execute inline to the thread which calls add(). This has no queuing and behaves like a normal executor, but always uses the caller’s thread to execute. - -Since the executor interface is defined by an abstract base class, code that calls the add() function has to assume that the underlying executor may execute the function object inline. As a consequence, extra care must be taken in situations such as: - -If using mutexes, avoiding calls to add() while holding the lock. -If iterating over a container and calling add() for each element, ensuring the added function object cannot invalidate the iterators. -This proposal instead makes an explicit distinction between operations that can execute inline, and those that cannot. The library user is then able to choose the appropriate operation for their use case. - -15. Appendix: Proposed text -15.1. Header synopsis -15.2. Class template handler_type -15.2.1. handler_type members -15.3. Class template async_result -15.3.1. async_result members -15.4. Class template async_completion -15.4.1. async_completion members -15.5. Header synopsis -15.6. Class template associated_allocator -15.6.1. associated_allocator members -15.7. Function get_associated_allocator -15.8. Header synopsis -15.9. Requirements -15.9.1. Executor requirements -15.9.2. Service requirements -15.10. Class execution_context -15.10.1. execution_context constructor -15.10.2. execution_context destructor -15.10.3. execution_context operations -15.10.4. execution_context protected operations -15.10.5. execution_context globals -15.11. Class execution_context::service -15.11.1. execution_context::service constructors -15.11.2. execution_context::service observers -15.11.3. execution_context::service operations -15.12. Class template is_executor -15.13. Executor argument tag -15.14. uses_executor -15.14.1. uses_executor trait -15.14.2. uses-executor construction -15.15. Class template associated_executor -15.15.1. associated_executor members -15.16. Function get_associated_executor -15.17. Class template executor_wrapper -15.17.1. executor_wrapper constructors -15.17.2. executor_wrapper access -15.17.3. executor_wrapper invocation -15.17.4. Class template specialization async_result -15.17.5. Class template specialization associated_allocator -15.17.6. Class template specialization associated_executor -15.18. Function wrap -15.19. Class template executor_work -15.19.1. executor_work constructors -15.19.2. executor_work destructor -15.19.3. executor_work observers -15.19.4. executor_work modifiers -15.20. Function make_work -15.21. Class system_executor -15.21.1. system_executor operations -15.21.2. system_executor comparisons -15.22. Class bad_executor -15.22.1. bad_executor constructor -15.23. Class executor -15.23.1. executor constructors -15.23.2. executor assignment -15.23.3. executor destructor -15.23.4. executor modifiers -15.23.5. executor operations -15.23.6. executor capacity -15.23.7. executor target access -15.23.8. executor comparisons -15.23.9. executor specialized algorithms -15.24. Function dispatch -15.25. Function post -15.26. Function defer -15.27. Header synopsis -15.28. Class template strand -15.28.1. strand constructors -15.28.2. strand assignment -15.28.3. strand destructor -15.28.4. strand operations -15.28.5. strand comparisons -15.29. Header synopsis -15.30. Function dispatch_at -15.31. Function post_at -15.32. Function defer_at -15.33. Function dispatch_after -15.34. Function post_after -15.35. Function defer_after -15.36. Header synopsis -15.37. Class template use_future_t -15.37.1. use_future_t constructors -15.37.2. use_future_t members -15.37.3. use_future_t traits -15.38. Class template specialization async_result for packaged_task -15.39. Class template packaged_handler -15.39.1. packaged_handler constructors -15.39.2. packaged_handler operations -15.39.3. Class template specialization async_result -15.40. Class template packaged_token -15.40.1. packaged_token constructors -15.40.2. packaged_token operations -15.41. Function package -15.42. Header synopsis -15.43. Class thread_pool -15.43.1. thread_pool constructors/destructor -15.43.2. thread_pool members -15.44. Class thread_pool::executor_type -15.44.1. thread_pool::executor_type constructors -15.44.2. thread_pool::executor_type assignment -15.44.3. thread_pool::executor_type operations -15.44.4. thread_pool::executor_type comparisons -15.45. Header synopsis -15.46. Class loop_scheduler -15.46.1. loop_scheduler constructors/destructor -15.46.2. loop_scheduler members -15.47. Class loop_scheduler::executor_type -15.47.1. loop_scheduler::executor_type constructors -15.47.2. loop_scheduler::executor_type assignment -15.47.3. loop_scheduler::executor_type operations -15.47.4. loop_scheduler::executor_type comparisons -15.1. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct handler_type; - - template - using handler_type_t = - typename handler_type::type; - - template class async_result; - - template - struct async_completion; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.2. Class template handler_type -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct handler_type - { - typedef see below type; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Template parameter CompletionToken specifies the model used to obtain the result of the asynchronous operation. Template parameter Signature is the call signature (C++ Std, [func.def]) for the handler type invoked on completion of the asynchronous operation. - -A program may specialize this trait if the CompletionToken template parameter in the specialization is a user-defined type. - -Specializations of handler_type shall define a nested handler type type that satisfies the MoveConstructible requirements, and objects of type type shall be constructible from an lvalue or rvalue of the type specified by the CompletionToken template parameter. - -15.2.1. handler_type members -typedef see below type; -Type: CompletionToken if CompletionToken and decay_t are the same type; otherwise, handler_type_t, Signature>. - -15.3. Class template async_result -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class async_result - { - public: - typedef void type; - - explicit async_result(Handler&); - async_result(const async_result&) = delete; - async_result& operator=(const async_result&) = delete; - - type get(); - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Template argument Handler is a handler type produced by handler_type_t for some completion token type T and call signature S. - -A program may specialize this template if the Handler template parameter in the specialization is a user-defined type. - -Specializations of async_result shall satisfy the Destructible requirements (C++ Std, [destructible]) in addition to the requirements in the table below. In this table, R is a specialization of async_result for the template parameter Handler; r is a modifiable lvalue of type R; and h is a modifiable lvalue of type Handler. - -Table 1. async_result specialization requirements - -Expression - -Return type - -Note - -R::type - -void; or a type satisfying MoveConstructible requirements (C++ Std, [moveconstructible]) - -R r(h); - -r.get() - -R::type - -The get() member function shall be used only as a return expression. - - -15.3.1. async_result members -explicit async_result(Handler&); -Effects: Does nothing. - -type get(); -Effects: Does nothing. - -15.4. Class template async_completion -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct async_completion - { - typedef handler_type_t handler_type; - - explicit async_completion(remove_reference_t& t); - async_completion(const async_completion&) = delete; - async_completion& operator=(const async_completion&) = delete; - - see below handler; - async_result result; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Template parameter CompletionToken specifies the model used to obtain the result of the asynchronous operation. Template parameter Signature is the call signature (C++ Std, [func.def]) for the handler type invoked on completion of the asynchronous operation. - -15.4.1. async_completion members -explicit async_completion(remove_reference_t& t); -Effects: If CompletionToken and handler_type are the same type, binds handler to t; otherwise, initializes handler with the result of forward(t). Initializes result with handler. - -see below handler; -Type: handler_type& if CompletionToken and handler_type are the same type; otherwise, handler_type. - -15.5. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template> - struct associated_allocator; - - template> - using associated_allocator_t = typename associated_allocator::type; - - // get_associated_allocator: - - template - associated_allocator_t get_associated_allocator(const T& t); - template - associated_allocator_t - get_associated_allocator(const T& t, const Alloc& a); - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.6. Class template associated_allocator -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template> - struct associated_allocator - { - typedef see below type; - - static type get(const T& t, const Alloc& a = Alloc()) noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -A program may specialize this traits type if the T template parameter in the specialization is a user-defined type. The template parameter Alloc shall be a type meeting Allocator requirements (C++ Std, [allocator.requirements]). - -Specializations of associated_allocator shall satisfy the requirements in the table below. In this table, X is a specialization of associated_allocator for the template parameter T; t is a const reference to an object of type T; and a is an object of type Alloc. - -Table 2. associated_allocator specialization requirements - -Expression - -Return type - -Note - -typename X::type - -A type meeting Allocator requirements (C++ Std, [allocator.requirements]). - -X::get(t) - -X::type - -Shall not exit via an exception. -Equivalent to X::get(t, Alloc()). - -X::get(t, a) - -X::type - -Shall not exit via an exception. - - -15.6.1. associated_allocator members -typedef see below type; -Type: If T has a nested type allocator_type, typename T::allocator_type. Otherwise Alloc. - -type get(const T& t, const Alloc& a = Alloc()) noexcept; -Returns: If T has a nested type allocator_type, t.get_allocator(). Otherwise a. - -15.7. Function get_associated_allocator -template - associated_allocator_t get_associated_allocator(const T& t); -Returns: associated_allocator::get(t). - -template - associated_allocator_t - get_associated_allocator(const T& t, const Alloc& a); -Returns: associated_allocator::get(t, a). - -15.8. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - enum class fork_event { - prepare, - parent, - child - }; - - class execution_context; - - class service_already_exists; - - template Service& use_service(execution_context& ctx); - template Service& - make_service(execution_context& ctx, Args&&... args); - template bool has_service(execution_context& ctx) noexcept; - - template struct is_executor : false_type {}; - - struct executor_arg_t { }; - constexpr executor_arg_t executor_arg = executor_arg_t(); - - template struct uses_executor; - - template - struct associated_executor; - - template - using associated_executor_t = typename associated_executor::type; - - // get_associated_executor: - - template - associated_executor_t get_associated_executor(const T& t); - template - associated_executor_t - get_associated_executor(const T& t, const Executor& ex); - template - associated_executor_t - get_associated_executor(const T& t, ExecutionContext& ctx); - - template struct executor_wrapper; - - template - struct handler_type, Signature>; - - template - class async_result>; - - template - struct associated_allocator, Allocator>; - - template - struct associated_executor, Executor1>; - - // wrap: - - template - executor_wrapper, Executor> wrap(const Executor& ex, T&& t); - template - executor_wrapper, typename ExecutionContext::executor_type> - wrap(ExecutionContext& ctx, T&& t); - - template struct executor_work; - - // make_work: - - template - executor_work - make_work(const Executor& ex); - template - executor_work - make_work(ExecutionContext& ctx); - template - executor_work> - make_work(const T& t); - template - executor_work> - make_work(const T& t, const Executor& ex); - template - executor_work> - make_work(const T& t, ExecutionContext& ctx); - - class system_executor; - - bool operator==(const system_executor&, const system_executor&); - bool operator!=(const system_executor&, const system_executor&); - - template<> struct is_executor : true_type {}; - - class bad_executor; - - class executor; - - template <> struct is_executor : true_type {}; - - bool operator==(const executor& a, const executor& b) noexcept; - bool operator==(const executor& e, nullptr_t) noexcept; - bool operator==(nullptr_t, const executor& e) noexcept; - bool operator!=(const executor& a, const executor& b) noexcept; - bool operator!=(const executor& e, nullptr_t) noexcept; - bool operator!=(nullptr_t, const executor& e) noexcept; - - // dispatch: - - template - auto dispatch(CompletionToken&& token); - template - auto dispatch(const Executor& ex, CompletionToken&& token); - template - auto dispatch(ExecutionContext& ctx, CompletionToken&& token); - - // post: - - template - auto post(CompletionToken&& token); - template - auto post(const Executor& ex, CompletionToken&& token); - template - auto post(ExecutionContext& ctx, CompletionToken&& token); - - // defer: - - template - auto defer(CompletionToken&& token); - template - auto defer(const Executor& ex, CompletionToken&& token); - template - auto defer(ExecutionContext& ctx, CompletionToken&& token); - - } // inline namespace concurrency_v1 - } // namespace experimental - - template - struct uses_allocator - : true_type {}; - -} // namespace std -15.9. Requirements -15.9.1. Executor requirements -The library describes a standard set of requirements for executors. A type meeting Executor requirements shall embody a set of rules for determining how submitted function objects are to be executed. - -An executor type X shall satisfy the requirements of CopyConstructible (C++ Std, [copyconstructible]) types. No constructor, comparison operator, copy operation, move operation, swap operation, or member functions context, on_work_started and on_work_finished on these types shall exit via an exception. - -The executor copy constructor, comparison operators, and member functions defined in these requirements shall not introduce data races as a result of concurrent calls to those functions from different threads. - -In the table below, X denotes an executor class, x denotes a value of type X&, x1 and x2 denote values of type const X&, x3 denotes a value of type X&&, f denotes a MoveConstructible (C++ Std, [moveconstructible]) function object callable with zero arguments, a denotes a value of type A meeting Allocator requirements (C++ Std, [allocator.requirements]), t denotes an object of type T, and u denotes an identifier. - -Table 3. Executor requirements - -expression - -type - -assertion/note -pre/post-conditions - -X u(x1); - -Shall not exit via an exception. - -post: u == x1 - -X u(x3); - -Shall not exit via an exception. - -post: u equals the prior value of x3. - -x1 == x2 - -bool - -Shall not exit via an exception. - -Returns true only if x1 and x2 can be interchanged with identical effects in any of the expressions defined in these type requirements. [Note: false does not necessarily imply that the effects are not identical. —end note] - -operator== shall be reflexive, symmetric, and transitive, and shall not exit via an exception. - -x1 != x2 - -bool - -Shall not exit via an exception. - -Same as !(x1 == x2). - -x.context() - -execution_context&, or a type that is convertible to execution_context&. - -Shall not exit via an exception. - -x.on_work_started() - -Shall not exit via an exception. - -x.on_work_finished() - -Shall not exit via an exception. - -Requires: A preceding call x1.on_work_started() where x == x1. - -x.dispatch(std::move(f),a) - -Effects: Calls DECAY_COPY(forward(f))() at most once. The executor may invoke f in the current thread, prior to returning from dispatch. The call to DECAY_COPY() is evaluated in the thread that called dispatch. - -Executor implementations are encouraged to use the supplied allocator to allocate any memory required to store the function object. The executor shall deallocate all memory prior to invoking the function object. - -Synchronization: The invocation of dispatch synchronizes with (C++ Std, [intro.multithread]) the invocation of f. - -x.post(std::move(f),a) - -Effects: Calls DECAY_COPY(forward(f))() at most once. The executor may not invoke f in the current thread, prior to returning from post. The call to DECAY_COPY() is evaluated in the thread that called post. - -Executor implementations are encouraged to use the supplied allocator to allocate any memory required to store the function object. The executor shall deallocate all memory prior to invoking the function object. - -Synchronization: The invocation of post synchronizes with (C++ Std, [intro.multithread]) the invocation of f. - -x.defer(std::move(f),a) - -Effects: Calls DECAY_COPY(forward(f))() at most once. The executor may not invoke f in the current thread, prior to returning from defer. The call to DECAY_COPY() is evaluated in the thread that called defer. - -Executor implementations are encouraged to use the supplied allocator to allocate any memory required to store the function object. The executor shall deallocate all memory prior to invoking the function object. - -Synchronization: The invocation of defer synchronizes with (C++ Std, [intro.multithread]) the invocation of f. - -Note: Although the requirements placed on defer are identical to post, a separate function is used to convey the intention of the caller that the submitted function is a continuation of the current call context. The executor may use this information to optimize or otherwise adjust the way in which f is invoked. - - -15.9.2. Service requirements -A class is a service if it is publicly derived from another service, or if it is a class publicly derived from execution_context::service. - -A service may contain a publicly-accessible nested typedef named key_type. If the nested typedef key_type exists, the service class shall be the same type as key_type, or otherwise publicly and unambiguously derived from key_type. - -All services define a one-argument constructor that takes a reference to the execution_context object that owns the service. This constructor is explicit, preventing its participation in automatic conversions. - -A service may provide additional constructors with two or more arguments, where the first argument is a reference to the execution_context object that owns the service. [Note: These constructors may be called by the make_service function. —end note] - -[Example: - - -class my_service : public execution_context::service -{ -public: - typedef my_service key_type; - explicit my_service(execution_context& ctx); -private: - virtual void shutdown_service(); - ... -}; - -—end example] - -A service's shutdown_service member function must cause all copies of user-defined function objects that are held by the service to be destroyed. - -15.10. Class execution_context -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class execution_context - { - public: - class service; - - // construct / copy / destroy: - - execution_context(); - execution_context(const execution_context&) = delete; - execution_context& operator=(const execution_context&) = delete; - virtual ~execution_context(); - - // execution context operations: - - void notify_fork(fork_event e); - - protected: - - // execution context protected operations: - - void shutdown_context(); - void destroy_context(); - }; - - // service access: - template Service& use_service(execution_context& ctx); - template Service& - make_service(execution_context& ctx, Args&&... args); - template bool has_service(execution_context& ctx) noexcept; - class service_already_exists : public logic_error { ... }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Class execution_context implements an extensible, type-safe, polymorphic set of services, indexed by service type. - -Access to the services of an execution_context is via three function templates, use_service<>, make_service<> and has_service<>. - -In a call to use_service(), the type argument chooses a service, making available all members of the named type. If the service is not present in an execution_context, an object of type Service is created and added to the execution_context. A C++ program can check if an execution_context implements a particular service with the function template has_service(). - -Service objects may be explicitly added to an execution_context using the function template make_service(). If the service is already present, the service_already_exists exception is thrown. - -Once a service reference is obtained from an execution_context object by calling use_service<>, that reference remains usable until a call to destroy_context(). - -15.10.1. execution_context constructor -execution_context(); -Effects: Creates an object of class execution_context. - -15.10.2. execution_context destructor -~execution_context(); -Effects: Destroys an object of class execution_context. Performs shutdown_context() followed by destroy_context(). - -15.10.3. execution_context operations -void notify_fork(fork_event e); -Effects: For each service object svc in the set: -— If e == fork_event::prepare, performs svc->notify_fork(e) in reverse order of the beginning of service object lifetime (C++ Std, [basic.life]). -— Otherwise, performs svc->notify_fork(e) in order of the beginning of service object lifetime. - -15.10.4. execution_context protected operations -void shutdown_context(); -Effects: For each service object svc in the execution_context set, in reverse order of the beginning of service object lifetime (C++ Std, [basic.life]), performs svc->shutdown_service(). - -[Note: shutdown_context is an idempotent operation. —end note] - -void destroy_context(); -Effects: Destroys each service object in the execution_context set, in reverse order of the beginning of service object lifetime (C++ Std, [basic.life]). - -[Note: destroy_context is an idempotent operation. —end note] - -15.10.5. execution_context globals -The functions use_service, make_service and has_service shall not introduce data races as a result of concurrent calls to those functions from different threads. - -template Service& use_service(execution_context& ctx); -Let Key be Service::key_type if the nested typedef Service::key_type exists; otherwise, let Key be Service. - -Requires: Service is a service class that is publicly and unambiguously derived from execution_context::service. If the nested typedef Service::key_type exists, Service is the same type as Service::key_type, or Service is publicly and unambiguously derived from Service::key_type, and Service::key_type is publicly and unambiguously derived from execution_context::service. - -Effects: If an object of type Key does not already exist in the execution_context set identified by ctx, creates an object of type Service, initializing it with Service(ctx), and adds it to the set. - -Returns: A reference to the corresponding service of ctx. - -Notes: The reference returned remains valid until a call to destroy_context. - -template Service& - make_service(execution_context& ctx, Args&&... args); -Let Key be Service::key_type if the nested typedef Service::key_type exists; otherwise, let Key be Service. - -Requires: Service is a service class that is publicly and unambiguously derived from execution_context::service. If the nested typedef Service::key_type exists, Service is the same type as Service::key_type, or Service is publicly and unambiguously derived from Service::key_type, and Service::key_type is publicly and unambiguously derived from execution_context::service. A service object of type Key does not already exist in the execution_context set identified by ctx. - -Effects: Creates an object of type Service, initializing it with Service(ctx, forward(args)...), and adds it to the execution_context set identified by ctx. - -Throws: service_already_exists if a corresponding service object of type Key is already present in the set. - -Notes: The reference returned remains valid until a call to destroy_context. - -template bool has_service(execution_context& ctx) noexcept; -Let Key be Service::key_type if the nested typedef Service::key_type exists; otherwise, let Key be Service. - -Requires: Service is a service class that is publicly and unambiguously derived from execution_context::service. If the nested typedef Service::key_type exists, Service is the same type as Service::key_type, or Service is publicly and unambiguously derived from Service::key_type, and Service::key_type is publicly and unambiguously derived from execution_context::service. - -Returns: If an object of type Key is present in ctx, true; otherwise, false. - -15.11. Class execution_context::service -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class execution_context::service - { - protected: - // construct / copy / destroy: - - service(execution_context& owner); - service(const service&) = delete; - service& operator=(const service&) = delete; - virtual ~service(); - - // service observers: - - execution_context& context() noexcept; - - private: - friend class execution_context; // exposition only - - // service operations: - - virtual void shutdown_service() = 0; - virtual void notify_fork(fork_event e); - - execution_context& context_; // exposition only - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.11.1. execution_context::service constructors -service(execution_context& owner); -Postconditions: &context_ == &owner. - -15.11.2. execution_context::service observers -execution_context& context() noexcept; -Returns: context_. - -15.11.3. execution_context::service operations -void notify_fork(fork_event e); -Effects: Does nothing. - -15.12. Class template is_executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template struct is_executor : false_type {}; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -is_executor can be used to detect executor types satisfying the Executor type requirements. - -Instantiations of the is_executor template shall meet the UnaryTypeTrait requirements (C++ Std, [meta.rqmts]). A program may specialize this template for a user-defined type T to have a BaseCharacteristic of integral_constant with N > 0 to indicate that T should be treated as an executor type. - -15.13. Executor argument tag -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - struct executor_arg_t { }; - constexpr executor_arg_t executor_arg = executor_arg_t(); - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The executor_arg_t struct is an empty structure type used as a unique type to disambiguate constructor and function overloading. Specifically, types may have constructors with executor_arg_t as the first argument, immediately followed by an argument of a type that satisfies the Executor requirements. - -15.14. uses_executor -15.14.1. uses_executor trait -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template struct uses_executor; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Remark: Detects whether T has a nested executor_type that is convertible from Executor. Meets the BinaryTypeTrait requirements (C++ Std, [meta.rqmts]). The implementation shall provide a definition that is derived from false_type. A program may specialize this template to derive from true_type for a user-defined type T that does not have a nested executor_type but nonetheless can be constructed with an executor where either: - -— the first argument of a constructor has type executor_type and the second argument has type Executor; or - -— the last argument of a constructor has type Executor. - -15.14.2. uses-executor construction -Uses-executor construction with executor Executor refers to the construction of an object obj of type T, using constructor arguments v1, v2, ..., vN of types V1, V2, ..., VN, respectively, and an executor ex of type Executor, according to the following rules: - -— if uses_executor::value is false and is_constructible::value is true, then obj is initialized as obj(v1, v2, ..., vN); - -— otherwise, if uses_executor::value is true and is_constructible::value is true, then obj is initialized as obj(executor_arg, ex, v1, v2, ..., vN); - -— otherwise, if uses_executor::value is true and is_constructible::value is true, then obj is initialized as obj(v1, v2, ..., vN, ex); - -— otherwise, the request for uses-executor construction is ill-formed. [Note: An error will result if uses_executor::value is true but the specific constructor does not take an executor. This definition prevents a silent failure to pass the executor to an element. —end note] - -15.15. Class template associated_executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct associated_executor - { - typedef see below type; - - static type get(const T& t, const Executor& e = Executor()) noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -A program may specialize this traits type if the T template parameter in the specialization is a user-defined type. The template parameter Executor shall be a type meeting Executor requirements. - -Specializations of associated_executor shall satisfy the requirements in the table below. In this table, X is a specialization of associated_executor for the template parameter T; t is a const reference to an object of type T; and e is an object of type Executor. - -Table 4. associated_executor specialization requirements - -Expression - -Return type - -Note - -typename X::type - -A type meeting Executor requirements. - -X::get(t) - -X::type - -Shall not exit via an exception. -Equivalent to X::get(t, Executor()). - -X::get(t, e) - -X::type - -Shall not exit via an exception. - - -15.15.1. associated_executor members -typedef see below type; -Type: If T has a nested type executor_type, typename T::executor_type. Otherwise Executor. - -type get(const T& t, const Executor& e = Executor()) noexcept; -Returns: If T has a nested type executor_type, t.get_executor(). Otherwise e. - -15.16. Function get_associated_executor -template - associated_executor_t get_associated_executor(const T& t); -Returns: associated_executor::get(t). - -template - associated_executor_t - get_associated_executor(const T& t, const Executor& ex); -Returns: associated_executor::get(t, ex). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - associated_executor_t - get_associated_executor(const T& t, ExecutionContext& ctx); -Returns: associated_executor::get(t, ctx.get_executor()). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.17. Class template executor_wrapper -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class executor_wrapper - { - public: - // types: - - typedef T wrapped_type; - typedef Executor executor_type; - typedef see below result_type; // not always defined - typedef see below argument_type; // not always defined - typedef see below first_argument_type; // not always defined - typedef see below second_argument_type; // not always defined - - // construct / copy / destroy: - - executor_wrapper(T t, const Executor& ex); - executor_wrapper(const executor_wrapper& other) = default; - executor_wrapper(executor_wrapper&& other) = default; - template - executor_wrapper(const executor_wrapper& other); - template - executor_wrapper(executor_wrapper&& other); - template - executor_wrapper(executor_arg_t, const Executor& ex, - const executor_wrapper& other); - template - executor_wrapper(executor_arg_t, const Executor& ex, - executor_wrapper&& other); - - ~executor_wrapper(); - - // executor wrapper access: - - T& unwrap() noexcept; - const T& unwrap() const noexcept; - executor_type get_executor() const noexcept; - - // executor wrapper invocation: - - template - result_of_t operator()(Args&&... args) cv; - - private: - Executor ex_; // exposition only - T wrapped_; // exposition only - }; - - template - struct handler_type, Signature> - { - typedef executor_wrapper, Executor> type; - }; - - template - class async_result>; - - template - struct associated_allocator, Alloc>; - - template - struct associated_executor, Executor1>; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -executor_wrapper is a wrapper around an object or function of type T, and an executor object of type Executor satisfying Executor requirements. - -executor_wrapper has a weak result type (C++ Std, [func.require]). If T is a function type, result_type shall be a synonym for the return type of T. - -The template instantiation executor_wrapper shall define a nested type named argument_type as a synonym for T1 only if the type T is any of the following: - -— a function type or a pointer to function type taking one argument of type T1 - -— a pointer to member function R T0::f cv (where cv represents the member function’s cv-qualifiers); the type T1 is cv T0* - -— a class type with a member type argument_type; the type T1 is T::argument_type. - -The template instantiation executor_wrapper shall define two nested types named first_argument_type and second_argument_type as synonyms for T1 and T2, respectively, only if the type T is any of the following: - -— a function type or a pointer to function type taking two arguments of types T1 and T2 - -— a pointer to member function R T0::f(T2) cv (where cv represents the member function’s cv-qualifiers); the type T1 is cv T0* - -— a class type with member types first_argument_type and second_argument_type; the type T1 is T::first_argument_type. and the type T2 is T::second_argument_type. - -15.17.1. executor_wrapper constructors -executor_wrapper(T t, const Executor& ex); -Effects: Constructs an object of type executor_wrapper. Initializes ex_ with the value ex. If uses_executor::value is true, performs uses-executor construction to initialize wrapped_ with wrapped_(executor_arg, ex_, std::move(t)); otherwise, initializes wrapped_ with wrapped_(std::move(t)). - -template - executor_wrapper(const executor_wrapper& other); -Requires: U is T or convertible to T. OtherExecutor is Executor or convertible to Executor. - -Effects: Constructs an object of type executor_wrapper. Initializes ex_ with other.get_executor(). If uses_executor::value is true, performs uses-executor construction to initialize wrapped_ with wrapped_(executor_arg, ex_, other.unwrap()); otherwise, initializes wrapped_ with wrapped_(other.unwrap()). - -template - executor_wrapper(executor_wrapper&& other); -Requires: U is T or convertible to T. OtherExecutor is Executor or convertible to Executor. - -Effects: Constructs an object of type executor_wrapper. Initializes ex_ with other.get_executor(). If uses_executor::value is true, performs uses-executor construction to initialize wrapped_ with wrapped_(executor_arg, ex_, std::move(other.unwrap())); otherwise, initializes wrapped_ with wrapped_(std::move(other.unwrap())). - -template - executor_wrapper(executor_arg_t, const Executor& ex, - const executor_wrapper& other); -Requires: U is T or convertible to T. - -Effects: Constructs an object of type executor_wrapper. Initializes ex_ with ex. If uses_executor::value is true, performs uses-executor construction to initialize wrapped_ with wrapped_(executor_arg, ex_, other.unwrap()); otherwise, initializes wrapped_ with wrapped_(other.unwrap()). - -template - executor_wrapper(executor_arg_t, const Executor& ex, - executor_wrapper&& other); -Requires: U is T or convertible to T. - -Effects: Constructs an object of type executor_wrapper. Initializes ex_ with ex. If uses_executor::value is true, performs uses-executor construction to initialize wrapped_ with wrapped_(executor_arg, ex_, std::move(other.unwrap())); otherwise, initializes wrapped_ with wrapped_(std::move(other.unwrap())). - -15.17.2. executor_wrapper access -T& unwrap() noexcept; -const T& unwrap() const noexcept; -Returns: wrapped_. - -executor_type get_executor() const noexcept; -Returns: executor_. - -15.17.3. executor_wrapper invocation -template - result_of_t operator()(Args&&... args) cv; -Returns: INVOKE(unwrap(), forward(args)...) (C++ Std, [func.require]). - -Remarks: operator() is described for exposition only. Implementations are not required to provide an actual executor_wrapper::operator(). Implementations are permitted to support executor_wrapper function invocation through multiple overloaded operators or through other means. - -15.17.4. Class template specialization async_result -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class async_result> - { - public: - typedef typename async_result::type type; - - explicit async_result(executor_wrapper& wrapper); - async_result(const async_result&) = delete; - async_result& operator=(const async_result&) = delete; - - type get(); - - private: - async_result wrapped_; // exposition only - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The implementation shall provide a specialization of async_result that meets the async_result specialization requirements. - -explicit async_result(executor_wrapper& wrapper); -Effects: Initializes wrapped_ with wrapped_(wrapper.unwrap()). - -type get(); -Returns: wrapped_.get(). - -15.17.5. Class template specialization associated_allocator -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct associated_allocator, Alloc> - { - typedef associated_allocator_t type; - - static type get(const executor_wrapper& w, - const Alloc& a = Alloc()) noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The implementation shall provide a specialization of associated_allocator that meets the associated_allocator specialization requirements. - -static type get(const executor_wrapper& w, - const Alloc& a = Alloc()) noexcept; -Returns: associated_allocator::get(w.unwrap(), a). - -15.17.6. Class template specialization associated_executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - struct associated_executor, Executor1> - { - typedef Executor type; - - static type get(const executor_wrapper& w, - const Executor1& e = Executor1()) noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The implementation shall provide a specialization of associated_executor that meets the associated_executor specialization requirements. - -static type get(const executor_wrapper& w, - const Executor1& e = Executor1()) noexcept; -Returns: w.get_executor(). - -15.18. Function wrap -template - executor_wrapper, Executor> wrap(const Executor& ex, T&& t); -Returns: executor_wrapper, Executor>(forward(t), ex). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - executor_wrapper, typename ExecutionContext::executor_type> - wrap(ExecutionContext& ctx, T&& t); -Returns: executor_wrapper, typename ExecutionContext::executor_type>(forward(t), ctx.get_executor()). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.19. Class template executor_work -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class executor_work - { - public: - // types: - - typedef Executor executor_type; - - // construct / copy / destroy: - - explicit executor_work(const executor_type& ex) noexcept; - executor_work(const executor_work& other) noexcept; - executor_work(executor_work&& other) noexcept; - - executor_work operator=(const executor_type&) = delete; - - ~executor_work(); - - // executor work observers: - - executor_type get_executor() const noexcept; - bool owns_work() const noexcept; - - // executor work modifiers: - - void reset() noexcept; - - private: - Executor ex_; // exposition only - bool owns_; // exposition only - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.19.1. executor_work constructors -explicit executor_work(const executor_type& ex) noexcept; -Effects: Constructs an object of class executor_work, initializing ex_ with ex, and then performing ex_.on_work_started(). - -Postconditions: ex == ex_ and owns_ == true. - -executor_work(const executor_work& other) noexcept; -Effects: Constructs an object of class executor_work, initializing ex_ with other.ex. If other.owns_ == true, performs ex_.on_work_started(). - -Postconditions: ex == other.ex_ and owns_ == other.owns_. - -executor_work(executor_work&& other) noexcept; -Effects: Constructs an object of class executor_work, initializing ex_ with other.ex and owns_ with other.owns_. - -Postconditions: ex is equal to the prior value of other.ex_, owns_ is equal to the prior value of other.owns_, and other.owns_ == false. - -15.19.2. executor_work destructor -Effects: If owns_ is true, performs ex.on_work_finished(). - -15.19.3. executor_work observers -executor_type get_executor() const noexcept; -Returns: ex_. - -bool owns_work() const noexcept; -Returns: owns_. - -15.19.4. executor_work modifiers -void reset() noexcept; -Effects: If owns_ is true, performs ex.on_work_finished(). - -Postconditions: owns_ == false. - -15.20. Function make_work -template - executor_work - make_work(const Executor& ex); -Returns: executor_work(ex). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - executor_work - make_work(ExecutionContext& ctx); -Returns: An object of type executor_work initialized with the result of ctx.get_executor(). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -template - executor_work> - make_work(const T& t); -Returns: An object of type executor_work> initialized with the result of associated_executor::get(t). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is false and is_convertible::value is false. - -template - executor_work> - make_work(const T& t, const Executor& ex); -Returns: An object of type executor_work> initialized with the result of associated_executor::get(t, ex). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - executor_work> - make_work(const T& t, ExecutionContext& ctx); -Returns: An object of type executor_work> initialized with the result of associated_executor::get(t, ctx.get_executor()). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.21. Class system_executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class system_executor - { - public: - - // executor operations: - - execution_context& context() noexcept; - - void on_work_started() noexcept; - void on_work_finished() noexcept; - - template - void dispatch(Func&& f, const Alloc& a); - template - void post(Func&& f, const Alloc& a); - template - void defer(Func&& f, const Alloc& a); - }; - - bool operator==(const system_executor&, const system_executor&) noexcept; - bool operator!=(const system_executor&, const system_executor&) noexcept; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Class system_executor is a DefaultConstructible type (C++ Std, [defaultconstructible]) satisfying Executor requirements. It represents a set of rules where function objects are permitted to execute on any thread. - -To satisfy the executor requirements for the post and defer member functions, the system executor may allocate thread objects to run the submitted function objects. If std::exit is called, and there remain unexecuted functions objects that have been submitted using post or defer, the implementation shall discard these function objects without calling them. - -15.21.1. system_executor operations -execution_context& context() noexcept; -Returns: A reference to a static-duration object of a type derived from execution_context. - -void on_work_started() noexcept; -Effects: Does nothing. - -void on_work_finished() noexcept; -Effects: Does nothing. - -template - void dispatch(Func&& f, const Alloc& a); -Effects: Calls DECAY_COPY(forward(f))(). - -template - void post(Func&& f, const Alloc& a); -Effects: Calls DECAY_COPY(forward(f))() as if in a thread of execution represented by a thread object, with the call to DECAY_COPY() being evaluated in the thread that called post. Any exception propagated from the execution of DECAY_COPY(forward(f))() shall result in a call to std::terminate. - -template - void defer(Func&& f, const Alloc& a); -Effects: Calls DECAY_COPY(forward(f))() as if in a thread of execution represented by a thread object, with the call to DECAY_COPY() being evaluated in the thread that called defer. Any exception propagated from the execution of DECAY_COPY(forward(f))() shall result in a call to std::terminate. - -15.21.2. system_executor comparisons -bool operator==(const system_executor&, const system_executor&) noexcept; -Returns: true. - -bool operator!=(const system_executor&, const system_executor&) noexcept; -Returns: false. - -15.22. Class bad_executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class bad_executor : public exception - { - public: - // constructor: - bad_executor() noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -An exception of type bad_executor is thrown by executor member functions dispatch, post and defer when the executor object has no target. - -15.22.1. bad_executor constructor -bad_executor() noexcept; -Effects: Constructs a bad_executor object. - -15.23. Class executor -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class executor - { - public: - // construct / copy / destroy: - - executor() noexcept; - executor(nullptr_t) noexcept; - executor(const executor& e) noexcept; - executor(executor&& e) noexcept; - template executor(Executor e); - template - executor(allocator_arg_t, const Alloc& a, Executor e); - - executor& operator=(const executor& e) noexcept; - executor& operator=(executor&& e) noexcept; - executor& operator=(nullptr_t) noexcept; - template executor& operator=(Executor e); - - ~executor(); - - // executor modifiers: - - void swap(executor& other) noexcept; - template - void assign(Executor e, const Alloc& e); - - // executor operations: - - execution_context& context() noexcept; - - void on_work_started() noexcept; - void on_work_finished() noexcept; - - template - void dispatch(Func&& f, const Alloc& a); - template - void post(Func&& f, const Alloc& a); - template - void defer(Func&& f, const Alloc& a); - - // executor capacity: - - explicit operator bool() const noexcept; - - // executor target access: - - const type_info& target_type() const noexcept; - template Executor* target() noexcept; - template const Executor* target() const noexcept; - }; - - template<> struct is_executor : true_type {}; - - // executor comparisons: - - bool operator==(const executor& a, const executor& b) noexcept; - bool operator==(const executor& e, nullptr_t) noexcept; - bool operator==(nullptr_t, const executor& e) noexcept; - bool operator!=(const executor& a, const executor& b) noexcept; - bool operator!=(const executor& e, nullptr_t) noexcept; - bool operator!=(nullptr_t, const executor& e) noexcept; - - // executor specialized algorithms: - - void swap(executor& a, executor& b) noexcept; - - } // inline namespace concurrency_v1 - } // namespace experimental - - template - struct uses_allocator - : true_type {}; - -} // namespace std -The executor class provides a polymorphic wrapper for types that satisfy the Executor requirements. The target object is the executor object that is held by the wrapper. The executor type itself meets the requirements for an Executor. - -[Note: To meet the noexcept requirements for executor copy constructors and move constructors, implementations may share a target between two or more executor objects. —end note] - -15.23.1. executor constructors -executor() noexcept; -Postconditions: !*this. - -executor(nullptr_t) noexcept; -Postconditions: !*this. - -executor(const executor& e) noexcept; -Postconditions: !*this if !e; otherwise, *this targets e.target() or a copy of e.target(). - -executor(executor&& e) noexcept; -Effects: If !e, *this has no target; otherwise, moves e.target() or move-constructs the target of e into the target of *this, leaving e in a valid state with an unspecified value. - -template executor(Executor e); -Requires: Executor shall satisfy the Executor requirements. - -Effects: *this targets a copy of e initialized with std::move(e). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - executor(allocator_arg_t, const Alloc& a, Executor e); -Requires: Executor shall satisfy the Executor requirements. Allocator conforms to the Allocator requirements (C++ Std, [allocator.requirements]). - -Effects: *this targets a copy of e initialized with std::move(e). - -A copy of the allocator argument is used to allocate memory, if necessary, for the internal data structures of the constructed executor object. - -15.23.2. executor assignment -executor& operator=(const executor& e) noexcept; -Effects: executor(e).swap(*this). - -Returns: *this. - -executor& operator=(executor&& e) noexcept; -Effects: Replaces the target of *this with the target of e, leaving e in a valid state with an unspecified value. - -Returns: *this. - -executor& operator=(nullptr_t) noexcept; -Effects: executor(nullptr).swap(*this). - -Returns: *this. - -template executor& operator=(Executor e); -Effects: executor(std::move(e)).swap(*this). - -Returns: *this. - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -15.23.3. executor destructor -~executor(); -Effects: If *this != nullptr, releases shared ownership of, or destroys, the target of *this. - -15.23.4. executor modifiers -void swap(executor& other) noexcept; -Effects: Interchanges the targets of *this and other. - -template - void assign(Executor e, const Alloc& e); -Effects: executor(allocator_arg, a, std::move(e)).swap(*this). - -15.23.5. executor operations -execution_context& context() noexcept; -Returns: e.context(), where e is the target object of *this. - -void on_work_started() noexcept; -Effects: e.on_work_started(), where e is the target object of *this. - -void on_work_finished() noexcept; -Effects: e.on_work_finished(), where e is the target object of *this. - -template - void dispatch(Func&& f, const Alloc& a); -Effects: e.dispatch(g, a), where e is the target object of *this, and g is a function object of unspecified type that, when called as g(), performs DECAY_COPY(f)(). - -template - void post(Func&& f, const Alloc& a); -Effects: e.post(g, a), where e is the target object of *this, and g is a function object of unspecified type that, when called as g(), performs DECAY_COPY(f)(). - -template - void defer(Func&& f, const Alloc& a); -Effects: e.defer(g, a), where e is the target object of *this, and g is a function object of unspecified type that, when called as g(), performs DECAY_COPY(f)(). - -15.23.6. executor capacity -explicit operator bool() const noexcept; -Returns: true if *this has a target, otherwise false, - -15.23.7. executor target access -const type_info& target_type() const noexcept; -Returns: If *this has a target of type T, typeid(T); otherwise, typeid(void). - -template Executor* target() noexcept; -template const Executor* target() const noexcept; -Requires: Executor shall satisfy the Executor requirements. - -Returns: If target_type() == typeid(Executor) a pointer to the stored executor target; otherwise a null pointer. - -15.23.8. executor comparisons -bool operator==(const executor& a, const executor& b) noexcept; -Returns: -— true if !a and !b; -— true if a and b share a target; -— true if e and f are the same type and e == f, where e is the target object of a and f is the target object of b; -— otherwise false. - -bool operator==(const executor& e, nullptr_t) noexcept; -bool operator==(nullptr_t, const executor& e) noexcept; -Returns: !e. - -bool operator!=(const executor& a, const executor& b) noexcept; -Returns: !(a == b). - -bool operator!=(const executor& e, nullptr_t) noexcept; -bool operator!=(nullptr_t, const executor& e) noexcept; -Returns: (bool) e. - -15.23.9. executor specialized algorithms -void swap(executor& a, executor& b) noexcept; -Effects: a.swap(b). - -15.24. Function dispatch -template - auto dispatch(CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Performs ex.dispatch(std::move(handler), alloc). - -Returns: result.get(). - -template - auto dispatch(const Executor& ex, CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w.reset(). -— Performs ex.dispatch(std::move(f), alloc). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto dispatch(ExecutionContext& ctx, CompletionToken&& token); -Returns: std::experimental::concurrency_v1::dispatch(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.25. Function post -template - auto post(CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Performs ex.post(std::move(handler), alloc). - -Returns: result.get(). - -template - auto post(const Executor& ex, CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w.reset(). -— Performs ex.post(std::move(f), alloc). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto post(ExecutionContext& ctx, CompletionToken&& token); -Returns: std::experimental::concurrency_v1::post(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.26. Function defer -template - auto defer(CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Performs ex.defer(std::move(handler), alloc). - -Returns: result.get(). - -template - auto defer(const Executor& ex, CompletionToken&& token); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w.reset(). -— Performs ex.defer(std::move(f), alloc). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto defer(ExecutionContext& ctx, CompletionToken&& token); -Returns: std::experimental::concurrency_v1::defer(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.27. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class strand; - - template - bool operator==(const strand& a, const strand& b); - template - bool operator!=(const strand& a, const strand& b); - - template - struct is_executor> : true_type {}; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.28. Class template strand -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class strand - { - public: - // types: - - typedef Executor inner_executor_type; - - // construct / copy / destroy: - - strand(); - explicit strand(Executor ex); - strand(const strand& other); - strand(strand&& other); - template strand(const strand& other); - template strand(strand&& other); - - strand& operator=(const strand& other); - strand& operator=(strand&& other); - template strand& operator=(const strand& other); - template strand& operator=(strand&& other); - - ~strand(); - - // strand operations: - - inner_executor_type get_inner_executor() const noexcept; - - bool running_in_this_thread() const noexcept; - - execution_context& context() noexcept; - - void on_work_started() noexcept; - void on_work_finished() noexcept; - - template - void dispatch(Func&& f, const Alloc& a); - template - void post(Func&& f, const Alloc& a); - template - void defer(Func&& f, const Alloc& a); - - private: - Executor inner_ex_; // exposition only - }; - - bool operator==(const strand& a, const strand& b); - bool operator!=(const strand& a, const strand& b); - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -strand is a wrapper around an object of type Executor satisfying Executor requirements. strand satisfies the Executor requirements. - -A strand provides guarantees of ordering and non-concurrency. Given: - -— strand objects s1 and s2 such that s1 == s2 - -— a function object f1 added to the strand s1 using post or defer, or using dispatch when s1.running_in_this_thread() == false - -— a function object f2 added to the strand s2 using post or defer, or using dispatch when s2.running_in_this_thread() == false - -then the implementation shall invoke f1 and f2 such that: - -— the invocation of f1 is not concurrent with the invocation of f2 - -— the invocation of f1 synchronizes with the invocation of f2. - -Furthermore, if the addition of f1 happens before the addition of f2, then the invocation of f1 happens before the invocation of f2. - -The strand copy constructors, comparison operators, and member functions shall not introduce data races as a result of concurrent calls to those functions from different threads. - -If any function f executed by the strand throws an exception, the subsequent strand state shall be as if f had exited without throwing an exception. - -15.28.1. strand constructors -strand(); -Effects: Constructs an object of class strand that represents a unique ordered, non-concurrent state. Initializes inner_ex_ with inner_ex_(). - -Remarks: This overload shall not participate in overload resolution unless Executor satisfies the DefaultConstructible requirements (C++ Std, [defaultconstructible]). - -explicit strand(Executor ex); -Effects: Constructs an object of class strand that represents a unique ordered, non-concurrent state. Initializes inner_ex_ with inner_ex_(ex). - -strand(const strand& other); -Effects: Constructs an object of class strand. Initalizes inner_ex_ with inner_ex_(other.inner_ex_). - -Postconditions: -— *this == other -— get_inner_executor() == other.get_inner_executor() - -strand(strand&& other); -Effects: Constructs an object of class strand. Initalizes inner_ex_ with inner_ex_(std::move(other.inner_ex_)). - -Postconditions: -— *this is equal to the prior value of other -— get_inner_executor() == other.get_inner_executor() - -template strand(const strand& other); -Requires: OtherExecutor is convertible to Executor. - -Effects: Constructs an object of class strand. Initalizes inner_ex_ with inner_ex_(other.inner_ex_). - -Postconditions: *this == other. - -template strand(strand&& other); -Requires: OtherExecutor is convertible to Executor. - -Effects: Constructs an object of class strand. Initalizes inner_ex_ with inner_ex_(other.inner_ex_). - -Postconditions: *this is equal to the prior value of other. - -15.28.2. strand assignment -strand& operator=(const strand& other); -Requires: Executor is Assignable (C++ Std [assignable]). - -Postconditions: -— *this == other -— get_inner_executor() == other.get_inner_executor() - -Returns: *this. - -strand& operator=(strand&& other); -Requires: Executor is Assignable (C++ Std [assignable]). - -Postconditions: -— *this is equal to the prior value of other -— get_inner_executor() == other.get_inner_executor() - -Returns: *this. - -template strand& operator=(const strand& other); -Requires: OtherExecutor is convertible to Executor. Executor is Assignable (C++ Std [assignable]). - -Effects: Equivalent to strand::operator=(Executor(other)). - -Returns: *this. - -template strand& operator=(strand&& other); -Requires: OtherExecutor is convertible to Executor. Executor is Assignable (C++ Std [assignable]). - -Effects: Equivalent to strand::operator=(Executor(std::move(other))). - -Returns: *this. - -15.28.3. strand destructor -~strand(); -Effects: Destroys an object of class strand. Function objects that were added to the strand but have not yet been executed shall still be executed in a way that meets the guarantees of ordering and non-concurrency. - -15.28.4. strand operations -inner_executor_type get_inner_executor() const noexcept; -Returns: inner_ex_. - -bool running_in_this_thread() const noexcept; -Returns: true if the current thread of execution is invoking a function that was submitted to the strand, or to any other strand object s such that s == *this, using dispatch, post or defer; otherwise false. - -execution_context& context() noexcept; -Returns: inner_ex_.context(). - -void on_work_started() noexcept; -Effects: Calls inner_ex_.on_work_started(). - -void on_work_finished() noexcept; -Effects: Calls inner_ex_.on_work_finished(). - -template - void dispatch(Func&& f, const Alloc& a); -Effects: If running_in_this_thread() is true, calls DECAY_COPY(forward(f))(). Otherwise, requests invocation of f, as if by forwarding the function object f and allocator a to the executor inner_ex_, such that the guarantees of ordering and non-concurrency are met. - -If f exits via an exception, and the execution of f is performed in the current thread and before dispatch returns, the exception shall propagate to the caller of dispatch() - -template - void post(Func&& f, const Alloc& a); -Effects: Requests invocation of f, as if by forwarding the function object f and allocator a to the executor inner_ex_, such that the guarantees of ordering and non-concurrency are met. - -template - void defer(Func&& f, const Alloc& a); -Effects: Requests invocation of f, as if by forwarding the function object f and allocator a to the executor inner_ex_, such that the guarantees of ordering and non-concurrency are met. - -15.28.5. strand comparisons -bool operator==(const strand& a, const strand& b); -Returns: true, if the strand objects share the same ordered, non-concurrent state; otherwise false. - -bool operator!=(const strand& a, const strand& b); -Returns: !(a == b). - -15.29. Header synopsis -#include - -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - // dispatch_at: - - template - auto dispatch_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); - template - auto dispatch_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto dispatch_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - // post_at: - - template - auto post_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); - template - auto post_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto post_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - // defer_at: - - template - auto defer_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); - template - auto defer_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto defer_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - // dispatch_after: - - template - auto dispatch_after(const chrono::duration& rel_time, - CompletionTokens&&... token); - template - auto dispatch_after(const chrono::duration& rel_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto dispatch_after(const chrono::duration& rel_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - // post_after: - - template - auto post_after(const chrono::duration& rel_time, - CompletionTokens&&... token); - template - auto post_after(const chrono::duration& rel_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto post_after(const chrono::duration& rel_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - // defer_after: - - template - auto defer_after(const chrono::duration& rel_time, - CompletionTokens&&... token); - template - auto defer_after(const chrono::duration& rel_time, - const Executor& ex, CompletionTokens&&... tokens); - template - auto defer_after(const chrono::duration& rel_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.30. Function dispatch_at -template - auto dispatch_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.dispatch(std::move(handler), alloc) followed by w.reset(). - -Returns: result.get(). - -template - auto dispatch_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w1 by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w1.reset(). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.dispatch(std::move(f), alloc) followed by w.reset(). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto dispatch_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::dispatch_at(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.31. Function post_at -template - auto post_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.post(std::move(handler), alloc) followed by w.reset(). - -Returns: result.get(). - -template - auto post_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w1 by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w1.reset(). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.post(std::move(f), alloc) followed by w.reset(). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto post_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::post_at(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.32. Function defer_at -template - auto defer_at(const chrono::time_point& abs_time, - CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Obtains the handler's associated executor object ex by performing get_associated_executor(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.defer(std::move(handler), alloc) followed by w.reset(). - -Returns: result.get(). - -template - auto defer_at(const chrono::time_point& abs_time, - const Executor& ex, CompletionTokens&&... tokens); -Let the type Handler be the handler function object type determined by performing handler_type_t. - -Requires: The type Handler must satisfy the MoveConstructible requirements (C++ Std, [moveconstructible]) and be callable with zero arguments. - -Effects: -— Constructs a function object handler of type Handler, initialized with handler(forward(token)). -— Constructs an object result of type async_result, initializing the object as result(handler). -— Creates a work object w by performing make_work(ex). -— Obtains the handler's associated executor object ex1 by performing get_associated_executor(handler). -— Creates a work object w1 by performing make_work(ex1). -— Obtains the handler's associated allocator object alloc by performing get_associated_allocator(handler). -— Constructs a function object f with a function call operator that performs ex1.dispatch(std::move(handler), alloc) followed by w1.reset(). -— Without blocking the current thread of execution, on expiration of the absolute timeout specified by abs_time performs ex.defer(std::move(f), alloc) followed by w.reset(). - -Returns: result.get(). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto defer_at(const chrono::time_point& abs_time, - ExecutionContext& ctx, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::defer_at(ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.33. Function dispatch_after -template - auto dispatch_after(const chrono::duration& rel_time, - CompletionTokens&&... token); -Returns: std::experimental::concurrency_v1::dispatch_at(chrono::steady_clock::now() + rel_time, forward(token)). - -template - auto dispatch_after(const chrono::duration& rel_time, - const Executor& e, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::dispatch_at(chrono::steady_clock::now() + rel_time, e, forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto dispatch_after(const chrono::duration& rel_time, - ExecutionContext& c, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::dispatch_at(chrono::steady_clock::now() + rel_time, ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.34. Function post_after -template - auto post_after(const chrono::duration& rel_time, - CompletionTokens&&... token); -Returns: std::experimental::concurrency_v1::post_at(chrono::steady_clock::now() + rel_time, forward(token)). - -template - auto post_after(const chrono::duration& rel_time, - const Executor& e, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::post_at(chrono::steady_clock::now() + rel_time, e, forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto post_after(const chrono::duration& rel_time, - ExecutionContext& c, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::post_at(chrono::steady_clock::now() + rel_time, ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.35. Function defer_after -template - auto defer_after(const chrono::duration& rel_time, - CompletionTokens&&... token); -Returns: std::experimental::concurrency_v1::defer_at(chrono::steady_clock::now() + rel_time, forward(token)). - -template - auto defer_after(const chrono::duration& rel_time, - const Executor& e, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::defer_at(chrono::steady_clock::now() + rel_time, e, forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_executor::value is true. - -template - auto defer_after(const chrono::duration& rel_time, - ExecutionContext& c, CompletionTokens&&... tokens); -Returns: std::experimental::concurrency_v1::defer_at(chrono::steady_clock::now() + rel_time, ctx.get_executor(), forward(token)). - -Remarks: This function shall not participate in overload resolution unless is_convertible::value is true. - -15.36. Header synopsis -#include - -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template> - class use_future_t; - - constexpr use_future_t<> use_future = use_future_t<>(); - - template - struct handler_type, R(Args...)>; - - template - class async_result>; - - template - class packaged_handler; - - template - class async_result>; - - template> - class packaged_token; - - template - struct handler_type, R(Args...)>; - - template> - packaged_token, Alloc> package(Func&& f, const Alloc& a = Alloc()); - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.37. Class template use_future_t -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template> - class use_future_t - { - public: - // use_future_t types: - typedef Allocator allocator_type; - - // use_future_t members: - constexpr use_future_t() noexcept; - explicit use_future_t(const Allocator& a) noexcept; - template use_future_t - operator[](const OtherAllocator& a) const noexcept; - allocator_type get_allocator() const noexcept; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The class template use_future_t defines a set of completion token types for use with asynchronous operations. - -15.37.1. use_future_t constructors -constexpr use_future_t() noexcept; -Effects: Constructs a use_future_t with default-constructed allocator. - -explicit use_future_t(const Allocator& a) noexcept; -Effects: Constructs an object of type use_future_t with post-condition get_allocator() == a. - -15.37.2. use_future_t members -template use_future_t - operator[](const OtherAllocator& a) const noexcept; -Returns: A use_future_t object where get_allocator() == a. - -allocator_type get_allocator() const noexcept; -Returns: The associated allocator object. - -15.37.3. use_future_t traits -template -struct handler_type, R(Args...)> -{ - typedef see below type; -}; -An object t1 of the nested function object type type is an asynchronous provider with an associated shared state (C++Std, [futures.state]). The type type provides type::operator() such that the expression t1(declval()...) is well formed. - -The implementation shall specialize associated_executor for type. For function objects executed using the associated executor's dispatch(), post() or defer() functions, any exception thrown is caught by the executor and stored in the associated shared state. - -The implementation shall specialize async_result for type such that, when an async_result object r1 is constructed from t1, the expression r1.get() returns a future with the same shared state as t1. - -The semantics of async_result::type and type::operator() are defined in the table below. In this table, N is the value of sizeof...(Args); let i be in the range [0..N) and let Ti be the ith type in Args; let Ui be decay::type for each type Ti in Args; and let ai be the ith argument to type::operator(). - -Table 5. handler_type, R(Args...)>::type semantics - -N - -U0 - -async_result::type - -type::operator effects - -0 - -future - -Makes the shared state ready. - -1 - -error_code - -future - -If a0 evaluates to true, atomically stores the exception pointer produced by make_exception_ptr(system_error(a0)) in the shared state. The shared state is made ready. - -1 - -exception_ptr - -future - -If a0 is non-null, atomically stores the exception pointer a0 in the shared state. The shared state is made ready. - -1 - -all other types - -future - -Atomically stores a0 in the shared state and makes that state ready. - -2 - -error_code - -future - -If a0 evaluates to true, atomically stores the exception pointer produced by make_exception_ptr(system_error(a0)) in the shared state; otherwise, atomically stores a1 in the shared state. The shared state is made ready. - -2 - -exception_ptr - -future - -If a0 is non-null, atomically stores the exception pointer in the shared state; otherwise, atomically stores a1 in the shared state. The shared state is made ready. - -2 - -all other types - -future> - -Atomically stores the result of make_tuple(a0,a1) in the shared state and makes that state ready. - ->2 - -error_code - -future> - -If a0 evaluates to true, atomically stores the exception pointer produced by make_exception_ptr(system_error(a0)) in the shared state; otherwise, atomically stores the result of make_tuple(a1,...,aN-1) in the shared state. The shared state is made ready. - ->2 - -exception_ptr - -future> - -If a0 is non-null, atomically stores the exception pointer in the shared state; otherwise, atomically stores the result of make_tuple(a1,...,aN-1) in the shared state. The shared state is made ready. - ->2 - -all other types - -future> - -Atomically stores the result of make_tuple(a0,...,aN-1) in the shared state and makes that state ready. - - -15.38. Class template specialization async_result for packaged_task -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class async_result> - { - public: - typedef future type; - - async_result(packaged_task& t); - async_result(const async_result&) = delete; - async_result& operator=(const async_result&) = delete; - - type get(); - - private: - type future_; // exposition only - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The implementation shall provide a specialization of async_result that meets the async_result specialization requirements. - -async_result(packaged_task& t); -Effects: Initializes future_ with t.get_future(). - -type get(); -Returns: std::move(future_). - -15.39. Class template packaged_handler -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class packaged_handler - : public packaged_task - { - public: - // packaged_handler types: - - typedef Alloc allocator_type; - - // packaged_handler constructors: - - template - explicit packaged_handler(packaged_token&& token); - - // packaged_handler operations: - - allocator_type get_allocator() const noexcept; - - private: - Alloc allocator_; // exposition only - }; - - template - class async_result>; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.39.1. packaged_handler constructors -template - explicit packaged_handler(packaged_token&& token); -Effects: Constructs an object of class packaged_handler, initializing the base class with packaged_task(std::move(token.f_)) and initializing allocator_ with token.allocator_. - -15.39.2. packaged_handler operations -allocator_type get_allocator() const noexcept; -Returns: allocator_. - -15.39.3. Class template specialization async_result -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template - class async_result> - : public async_result> - { - public: - explicit async_result(packaged_handler& h); - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -The implementation shall provide a specialization of async_result that meets the async_result specialization requirements. - -explicit async_result(packaged_handler& h); -Effects: Initializes the base class with h. - -15.40. Class template packaged_token -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - template> - class packaged_token - { - public: - // packaged_token types: - - typedef Alloc allocator_type; - - // packaged_token constructors: - - explicit packaged_token(Func f); - packaged_token(Func f, const Alloc& a); - - // packaged_token operations: - - allocator_type get_allocator() const noexcept; - - private: - Func f_; // exposition only - Alloc allocator_; // exposition only - }; - - template - struct handler_type, R(Args...)> - { - typedef packaged_handler(Args...), Alloc> type; - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.40.1. packaged_token constructors -explicit packaged_token(Func f); -Effects: Constructs an object of class packaged_token, initializing f_ with std::move(f) and default constructing allocator_. - -packaged_token(Func f, const Alloc& a); -Effects: Constructs an object of class packaged_token, initializing f_ with std::move(f) and allocator_ with a. - -15.40.2. packaged_token operations -allocator_type get_allocator() const noexcept; -Returns: allocator_. - -15.41. Function package -template> - packaged_token, Alloc> package(Func&& f, const Alloc& a = Alloc()); -Returns: packaged_token, Alloc>(forward(f), a). - -15.42. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class thread_pool; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.43. Class thread_pool -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class thread_pool : public execution_context - { - public: - // types: - - class executor_type; - - // construct / copy / destroy: - - thread_pool(); - explicit thread_pool(std::size_t num_threads); - thread_pool(const thread_pool&) = delete; - thread_pool& operator=(const thread_pool&) = delete; - ~thread_pool(); - - // thread_pool operations: - - executor_type get_executor() noexcept; - - void stop(); - - void join(); - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -Class thread_pool implements a fixed-size pool of threads. - -An object of type thread_pool has an associated executor object, meeting the Executor type requirements, of type thread_pool::executor_type and obtainable via the thread_pool object's get_executor member function. - -For an object of type thread_pool, outstanding work is defined as the sum of: - -— the total number of calls to the thread_pool executor's on_work_started function, less the total number of calls to the on_work_finished function; - -— the number of function objects that have been added to the thread_pool via the thread_pool executor, but not yet executed; and - -— the number of function objects that are currently being executed by the thread_pool. - -The thread_pool member functions get_executor, stop, and join, and the thread_pool::executor_type copy constructors, member functions and comparison operators, shall not introduce data races as a result of concurrent calls to those functions from different threads. - -15.43.1. thread_pool constructors/destructor -thread_pool(); -explicit thread_pool(std::size_t num_threads); -Effects: Creates an object of class thread_pool containing a number of threads of execution, each represented by a thread object. If specified, the number of threads in the pool is num_threads. Otherwise, the number of threads in the pool is implementation-defined. [Note: A suggested value for the implementation-defined number of threads is std::thread::hardware_concurrency() * 2. —end note] - -~thread_pool(); -Effects: Destroys an object of class thread_pool. Performs stop() followed by join(). - -15.43.2. thread_pool members -executor_type get_executor() noexcept; -Returns: An executor that may be used for submitting function objects to the thread_pool. - -void stop(); -Effects: Signals the threads in the pool to complete as soon as possible. If a thread is currently executing a function object, the thread will exit only after completion of that function object. The call to stop() returns without waiting for the threads to complete. - -void join(); -Effects: If not already stopped, signals the threads in the pool to exit once the outstanding work is 0. Blocks until all threads in the pool have completed. - -Synchronization: The completion of each thread in the pool synchronizes with (C++ Std, [intro.multithread]) the corresponding successful join() return. - -Postconditions: The threads in the pool represented by *this have completed. - -15.44. Class thread_pool::executor_type -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class thread_pool::executor_type - { - public: - // construct / copy / destroy: - - executor_type(const executor_type& other) noexcept; - executor_type(executor_type&& other) noexcept; - - executor_type& operator=(const executor_type& other) noexcept; - executor_type& operator=(executor_type&& other) noexcept; - - ~executor_type(); - - // executor operations: - - bool running_in_this_thread() const noexcept; - - thread_pool& context() noexcept; - - void on_work_started() noexcept; - void on_work_finished() noexcept; - - template - void dispatch(Func&& f, const Alloc& a); - template - void post(Func&& f, const Alloc& a); - template - void defer(Func&& f, const Alloc& a); - }; - - bool operator==(const thread_pool::executor_type& a, - const thread_pool::executor_type& b) noexcept; - bool operator!=(const thread_pool::executor_type& a, - const thread_pool::executor_type& b) noexcept; - - template<> struct is_executor : true_type {}; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -thread_pool::executor_type is a type satisfying Executor requirements. Objects of type thread_pool::executor_type are associated with a thread_pool, and function objects submitted using the dispatch, post or defer member functions will be executed by the thread_pool. - -15.44.1. thread_pool::executor_type constructors -executor_type(const executor_type& other) noexcept; -Effects: Constructs an object of class thread_pool::executor_type. - -Postconditions: *this == other. - -executor_type(executor_type&& other) noexcept; -Effects: Constructs an object of class thread_pool::executor_type. - -Postconditions: *this is equal to the prior value of other. - -15.44.2. thread_pool::executor_type assignment -executor_type& operator=(const executor_type& other) noexcept; -Postconditions: *this == other. - -Returns: *this. - -executor_type& operator=(executor_type&& other) noexcept; -Postconditions: *this is equal to the prior value of other. - -Returns: *this. - -15.44.3. thread_pool::executor_type operations -bool running_in_this_thread() const noexcept; -Returns: true if the current thread of execution is a member of the pool; otherwise false. - -thread_pool& context() noexcept; -Returns: A reference to the associated thread_pool object. - -void on_work_started() noexcept; -Effects: Increases the count of outstanding work associated with the thread_pool. - -void on_work_finished() noexcept; -Effects: Decreases the count of outstanding work associated with the thread_pool. - -template - void dispatch(Func&& f, const Alloc& a); -Effects: If running_in_this_thread() is true, calls DECAY_COPY(forward(f))(). Otherwise, requests execution of f. - -If f exits via an exception, and the execution of f is performed in the current thread and before dispatch returns, the exception shall propagate to the caller of dispatch. - -template - void post(Func&& f, const Alloc& a); -Effects: Requests execution of f. - -template - void defer(Func&& f, const Alloc& a); -Effects: Requests execution of f. - -15.44.4. thread_pool::executor_type comparisons -bool operator==(const thread_pool::executor_type& a, - const thread_pool::executor_type& b) noexcept; -Returns: &a.context() == &b.context(). - -bool operator!=(const thread_pool::executor_type& a, - const thread_pool::executor_type& b) noexcept; -Returns: !(a == b). - -15.45. Header synopsis -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class loop_scheduler; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -15.46. Class loop_scheduler -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class loop_scheduler : public execution_context - { - public: - // types: - - class executor_type; - - // construct / copy / destroy: - - loop_scheduler(); - explicit loop_scheduler(std::size_t concurrency_hint); - loop_scheduler(const loop_scheduler&) = delete; - loop_scheduler& operator=(const loop_scheduler&) = delete; - ~loop_scheduler(); - - // loop_scheduler operations: - - executor_type get_executor() noexcept; - - size_t run(); - template - size_t run_for(const chrono::duration& rel_time); - template - size_t run_until(const chrono::time_point& abs_time); - - size_t run_one(); - template - size_t run_one_for(const chrono::duration& rel_time); - template - size_t run_one_until(const chrono::time_point& abs_time); - - size_t poll(); - - size_t poll_one(); - - void stop(); - - bool stopped() const; - - void restart(); - }; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -An object of type loop_scheduler has an associated executor object, meeting the Executor type requirements, of type loop_scheduler::executor_type and obtainable via the loop_scheduler object's get_executor member function. - -For an object of type loop_scheduler, outstanding work is defined as the sum of: - -— the total number of calls to the loop_scheduler executor's on_work_started function, less the total number of calls to the on_work_finished function; - -— the number of function objects that have been added to the loop_scheduler via the loop_scheduler executor, but not yet executed; and - -— the number of function objects that are currently being executed by the loop_scheduler. - -If at any time the outstanding work falls to 0, the loop_scheduler is stopped as if by stop(). - -The loop_scheduler member functions get_executor, run, run_for, run_until, run_one, run_one_for, run_one_until, poll, poll_one, stop, and stopped, and the loop_scheduler::executor_type copy constructors, member functions and comparison operators, shall not introduce data races as a result of concurrent calls to those functions from different threads. [Note: The restart member function is excluded from these thread safety requirements. —end note] - -15.46.1. loop_scheduler constructors/destructor -loop_scheduler(); -explicit loop_scheduler(std::size_t concurrency_hint); -Effects: Creates an object of class loop_scheduler. - -Remarks: The concurrency_hint parameter is a suggestion to the implementation on the number of threads that should process asynchronous operations and execute function objects. - -~loop_scheduler(); -Effects: Destroys an object of class loop_scheduler. - -15.46.2. loop_scheduler members -executor_type get_executor() noexcept; -Returns: An executor that may be used for submitting function objects to the loop_scheduler. - -size_t run(); -Requires: Must not be called from a thread that is currently calling one of run, run_for, run_until, run_one, run_one_for, run_one_until, poll, or poll_one. - -Effects: Equivalent to: - -size_t n = 0; -while (run_one()) - if (n != numeric_limits::max()) - ++n; - -Returns: n. - -template - size_t run_for(const chrono::duration& rel_time); -Effects: Equivalent to: - -return run_until(chrono::steady_clock::now() + rel_time); - -template - size_t run_until(const chrono::time_point& abs_time); -Effects: Equivalent to: - -size_t n = 0; -while (run_one_until(abs_time)) - if (n != numeric_limits::max()) - ++n; - -Returns: n. - -size_t run_one(); -Requires: Must not be called from a thread that is currently calling one of run, run_for, run_until, run_one, run_one_for, run_one_until, poll, or poll_one. - -Effects: If the loop_scheduler object has no oustanding work, performs stop(). Otherwise, blocks while the loop_scheduler has outstanding work, or until the loop_scheduler is stopped, or until one function object has been executed. - -If an executed function object throws an exception, the exception shall be allowed to propagate to the caller of run_one(). The loop_scheduler state shall be as if the function object had returned normally. - -Returns: 1 if a function object was executed, otherwise 0. - -Notes: This function may invoke additional handlers through nested calls to the loop_scheduler executor's dispatch member function. These do not count towards the return value. - -template - size_t run_one_for(const chrono::duration& rel_time); -Effects: Equivalent to: - -return run_until(chrono::steady_clock::now() + rel_time); - -Returns: 1 if a function object was executed, otherwise 0. - -template - size_t run_one_until(const chrono::time_point& abs_time); -Effects: If the loop_scheduler object has no oustanding work, performs stop(). Otherwise, blocks while the loop_scheduler has outstanding work, or until the expiration of the absolute timeout (C++ Std, [thread.req.timing]) specified by abs_time, or until the loop_scheduler is stopped, or until one function object has been executed. - -If an executed function object throws an exception, the exception shall be allowed to propagate to the caller of run_one(). The loop_scheduler state shall be as if the function object had returned normally. - -Returns: 1 if a function object was executed, otherwise 0. - -Notes: This function may invoke additional handlers through nested calls to the loop_scheduler executor's dispatch member function. These do not count towards the return value. - -size_t poll(); -Effects: Equivalent to: - -size_t n = 0; -while (poll_one()) - if (n != numeric_limits::max()) - ++n; - -Returns: n. - -size_t poll_one(); -Effects: If the loop_scheduler object has no oustanding work, performs stop(). Otherwise, if there is a function object ready for immediate execution, executes it. - -If an executed function object throws an exception, the exception shall be allowed to propagate to the caller of poll_one(). The loop_scheduler state shall be as if the function object had returned normally. - -Returns: 1 if a handler was executed, otherwise 0. - -Notes: This function may invoke additional handlers through nested calls to the loop_scheduler executor's dispatch member function. These do not count towards the return value. - -void stop(); -Effects: Stops the loop_scheduler. Concurrent calls to run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one will end as soon as possible. If a call to run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one is currently executing a function object, the call will end only after completion of that function object. The call to stop() returns without waiting for concurrent calls to run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one to complete. - -Postconditions: stopped() == true. - -[Note: When stopped() == true, subsequent calls to run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one will exit immediately with a return value of 0, without executing any function objects. A loop_scheduler remains in the stopped state until a call to restart(). —end note] - -void restart(); -Postconditions: stopped() == false. - -15.47. Class loop_scheduler::executor_type -namespace std { - namespace experimental { - inline namespace concurrency_v1 { - - class loop_scheduler::executor_type - { - public: - // construct / copy / destroy: - - executor_type(const executor_type& other) noexcept; - executor_type(executor_type&& other) noexcept; - - executor_type& operator=(const executor_type& other) noexcept; - executor_type& operator=(executor_type&& other) noexcept; - - ~executor_type(); - - // executor operations: - - bool running_in_this_thread() const noexcept; - - loop_scheduler& context() noexcept; - - void on_work_started() noexcept; - void on_work_finished() noexcept; - - template - void dispatch(Func&& f, const Alloc& a); - template - void post(Func&& f, const Alloc& a); - template - void defer(Func&& f, const Alloc& a); - }; - - bool operator==(const loop_scheduler::executor_type& a, - const loop_scheduler::executor_type& b) noexcept; - bool operator!=(const loop_scheduler::executor_type& a, - const loop_scheduler::executor_type& b) noexcept; - - template<> struct is_executor : true_type {}; - - } // inline namespace concurrency_v1 - } // namespace experimental -} // namespace std -loop_scheduler::executor_type is a type satisfying Executor requirements. Objects of type loop_scheduler::executor_type are associated with a loop_scheduler, and function objects submitted using the dispatch, post or defer member functions will be executed by the loop_scheduler from within the run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one functions. - -15.47.1. loop_scheduler::executor_type constructors -executor_type(const executor_type& other) noexcept; -Effects: Constructs an object of class loop_scheduler::executor_type. - -Postconditions: *this == other. - -executor_type(executor_type&& other) noexcept; -Effects: Constructs an object of class loop_scheduler::executor_type. - -Postconditions: *this is equal to the prior value of other. - -15.47.2. loop_scheduler::executor_type assignment -executor_type& operator=(const executor_type& other) noexcept; -Postconditions: *this == other. - -Returns: *this. - -executor_type& operator=(executor_type&& other) noexcept; -Postconditions: *this is equal to the prior value of other. - -Returns: *this. - -15.47.3. loop_scheduler::executor_type operations -bool running_in_this_thread() const noexcept; -Returns: true if the current thread of execution is invoking the run, run_for, run_until, run_one, run_one_for, run_one_until, poll or poll_one function of the associated loop_scheduler object. - -loop_scheduler& context() noexcept; -Returns: A reference to the associated loop_scheduler object. - -void on_work_started() noexcept; -Effects: Increases the count of outstanding work associated with the loop_scheduler. - -void on_work_finished() noexcept; -Effects: Decreases the count of outstanding work associated with the loop_scheduler. - -template - void dispatch(Func&& f, const Alloc& a); -Effects: If running_in_this_thread() is true, calls DECAY_COPY(forward(f))(). Otherwise, requests execution of f. - -If f exits via an exception, and the execution of f is performed in the current thread and before dispatch returns, the exception shall propagate to the caller of dispatch. - -template - void post(Func&& f, const Alloc& a); -Effects: Requests execution of f. - -template - void defer(Func&& f, const Alloc& a); -Effects: Requests execution of f. - -15.47.4. loop_scheduler::executor_type comparisons -bool operator==(const loop_scheduler::executor_type& a, - const loop_scheduler::executor_type& b) noexcept; -Returns: &a.context() == &b.context(). - -bool operator!=(const loop_scheduler::executor_type& a, - const loop_scheduler::executor_type& b) noexcept; -Returns: !(a == b). - - -[1] The concept itself is not new, however the term is taken from Holgate, Len, Activatable Object, ACCU Overload Journal #122, August 2014 - -[2] http://theron-library.com - -[3] Unless a small-object optimisation is employed by std::function, but this is not guaranteed. \ No newline at end of file diff --git a/context/research/n4482.md b/context/research/n4482.md deleted file mode 100644 index c192b75..0000000 --- a/context/research/n4482.md +++ /dev/null @@ -1,203 +0,0 @@ -Doc. no: N4482 -Date: 2015-04-13 -Reply-To: Christopher Kohlhoff -Some notes on executors and the Networking Library Proposal -1. Introduction -This document is intended to provide some input to further discussion of executors by: - -— attempting to relate executor requirements, as defined in N4478 Networking Library Proposal, to the terminology described in N4231 Terms and definitions related to threads and N4156 Light-Weight Execution Agents; - -— capturing some aspects of the discussion related to executors from the February 2015 review of the Networking Library Proposal in Cologne; and - -— exploring some of the related prior art. - -2. Executors and progress guarantees -At its most general, one can imagine an executor as something that, when given an arbitrary sequence of instructions, runs them as a thread of execution. In these terms, there are no particular requirements as to whether an executor gives a concurrent, parallel, or weakly parallel progress guarantee. - -However, the executor model utilised in the Networking Library Proposal traffics only in arbitrary “normal” function objects executed on the abstract machine. A “normal” function object will eventually be allowed to execute all steps in its thread of execution, but only after it has executed its first step. That is, this executor model provides a parallel progress guarantee. - -Therefore, let us define the executor requirements of the Networking Library Proposal as parallel function object executor requirements. - -2.1. Implementing weaker progress guarantees -Of course, we can still use these executors to implement other abstractions that do provide no more than a weakly parallel progress guarantee. - -For instance, we can implement a scheduler for resumable functions on top of these executors. A given resumable function can have an associated executor, and the work to “resume” the resumable function is performed by an implementation-detail function object running on that executor. Even though that function object is executed with the stronger parallel guarantee, the entire resumable function can assume only weakly parallel progress. - -2.2. Requirements for stronger progress guarantees? -A library or program is allowed to implement the parallel function object executor requirements such that they provide a concurrent progress guarantee. One possible example is a new_thread_executor where every call to dispatch(), post() or defer() creates a new std::thread. - -However, the asynchronous operations framework used by the Networking Library Proposal does not require a concurrent progress guarantee — the parallel guarantee suffices for all uses. It is also worth noting that implied executor requirements of the prior art surveyed below do not require the concurrent guarantee. - -Yet, as N4156 suggests, there are times when a concurrent guarantee is required, and it would be beneficial to be able to detect the ability of an executor to meet this guarantee at compile time. - -Therefore, it may be desirable to define new concurrent function object executor type requirements. These requirements may be a refinement of the parallel function object requirements, or they may be independent. As a straw man proposal, consider use of the function name spawn() to mean launch a function object with a concurrent progress guarantee. An executor may provide this member function if it is able to meet this guarantee. - -These hypothetical concurrent function object executor requirements are out-of-scope for a Networking Library Proposal. Were Networking Library executors to impose such a requirement, it would introduce a burden on all implementations of the executor type requirements. Furthermore, for some executor types the concurrent progress requirements are impossible to implement (such as bounded thread pools, and strands or serial executors). - -3. Dispatch, Post and Defer -3.1. Dispatch vs Post -The Networking Library Proposal executor requirements for dispatch() say: - -The executor may invoke f1 prior to returning from dispatch. - -This requirement should be redefined in terms of forward progress. For example, a better definition may be to say that dispatch() is permitted to block the forward progress of the caller until f1() finishes execution. - -The executor requirements for post() (and, in a similar fashion, defer()) say: - -The executor shall not invoke f1 in the current thread of execution prior to returning from post. - -When considered in terms of forward progress this may be an over-specification. A possible redefinition of these requirements may be to say that post() and defer() are not permitted to block the forward progress of the caller pending completion of f1(). - -When post() is defined in this way, it means that in certain very limited cases an implementation of post() may invoke f1 in the calling thread. For example, the type of the function object f1 is "known" to an executor and is "known" to not block. - -3.1.1. Just a hint? -Is the choice between dispatch() and post() better presented as a hint to an executor? - -dispatch() is provided as a tool to minimise the latency and overhead of f1's execution, provided doing so does not violate the rules of the executor. The widespread existence of dispatch() in the prior art attests to its utility. - -However, a consequence of dispatch()'s specification is that its use can introduce deadlock. This occurs when the caller holds a mutex and f1 attempts to acquire the same mutex. Thus the choice between dispatch() and post() impacts program correctness. A hint, which by definition need not be respected, is an inappropriate way for the caller to express its intention. - -3.2. Post vs Defer -Where the distinction between dispatch() and post()/defer() is related to the forward progress of the caller, the distinction between post() and defer() relates to the forward progress of f1. The executor requirements say: - -defer is used to convey the intention of the caller that the submitted function is a continuation of the current call context. The executor may use this information to optimize or otherwise adjust the way in which f is invoked. - -A better way to define this may be to say that the use of post() means that we prefer that the caller does not block the first step of f1's progress, whereas defer() means that we prefer that the caller does block the first step of f1. - -3.2.1. Just a hint? -As the difference between post() and defer() uses the word "prefer", there is a case to be made that this is just a hint to the executor. - -It may be more than a hint in circumstances where the caller knows something about the concrete executor being used. For example, on a specific bounded thread pool implementation, a call to post() will allow f1 to exploit the available concurrency in the pool even if the caller continues to execute. (Although, even if a hint is used to distinguish between post() and defer(), this may be addressed by simply specifying that the concrete executor will obey the hint in a certain way.) - -4. Related prior art -In surveying the prior art, we will: - -— Identify whether it provides an equivalent to dispatch. - -— Identify whether it provides an equivalent to post. - -— Identify whether it provides facilities for counting outstanding work, i.e. the equivalent of the on_work_started and on_work_finished functions. - -The following table summarises the findings. Where a facility is provided it is marked "y", otherwise "n'. In some cases, the prior art provides a variant of dispatch where it's not just that it "may" block the caller, but that it always blocks the caller. These are marked "y (v)". - -library - -dispatch - -post - -work counting - -Boost.Asio - -y - -y - -y - -Java Executor - -y - -n - -n - -Grand Central Dispatch queues - -y (v) - -y - -n - -.NET SynchronizationContext - -y (v) - -y - -y - -.NET Reactive Extensions Scheduler - -y - -n - -n - -Thread Building Blocks task_arena - -y (v) - -y - -n - -4.1. Boost.Asio -Although the Networking Library Proposal is based on Boost.Asio, the executors facility is a relatively new addition to the library in order to enable move-only completion handlers. In this section we will discuss the original realisation of executor facilities in the library. - -The Boost.Asio io_service class is an executor for function objects that provides a parallel progress guarantee. - -The io_service::dispatch() and io_service::post() functions provide the dispatch and post semantics respectively. - -Work counting is performed via the io_service::work class. Objects of this type automatically count work as they are constructed and destroyed. - -4.2. Java Executor -(http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/Executor.html) - -The Java Executor interface provides only a single method 'execute'. This method has dispatch semantics: - -the Executor interface does not strictly require that execution be asynchronous. In the simplest case, an executor can run the submitted task immediately in the caller's thread. - -4.3. Grand Central Dispatch queues -(https://developer.apple.com/library/ios/documentation/Performance/Reference/GCD_libdispatch_Ref/index.html) - -Grand Central Dispatch provides dispatch queues, which let you execute arbtitrary blocks of code either asynchronously or synchronously with respect to the caller. Dispatch queues may be "serial" or "concurrent". - -The dispatch_sync function provides dispatch semantics. However, dispatch_sync always blocks the caller: - -Submits a block object for execution on a dispatch queue and waits until that block completes. ... As an optimization, this function invokes the block on the current thread when possible. - -The dispatch_async function provides post semantics: - -Submits an application-defined function for asynchronous execution on a dispatch queue and returns immediately. - -4.4. .NET SynchronizationContext -(https://msdn.microsoft.com/en-us/library/system.threading.synchronizationcontext(v=vs.110).aspx) - -The .NET SynchronizationContext class is used, amongst other things, to coordinate asynchronous operations in a threaded environment. - -The Send function provides dispatch semantics, however it always blocks the caller: - -The Send method starts a synchronous request to send a message. - -The Post function provides post semantics: - -The Post method starts an asynchronous request to post a message. - -The OperationStarted and OperationCompleted functions provide the work counting facilities. - -4.5. .NET Reactive Extensions Scheduler -(https://msdn.microsoft.com/en-us/library/system.reactive.concurrency.scheduler(v=vs.103).aspx) - -The Scheduler interface provides several overloads of a Schedule function, used to submit actions. - -The existence of a concrete implementation class ImmediateScheduler indicates that the Schedule function provides dispatch semantics: - -Represents an object that schedules units of work to run immediately on the current thread. - -4.6. Thread Building Blocks task_arena -(https://www.threadingbuildingblocks.org/docs/help/reference/task_scheduler/task_arena_cls.htm) - -A task_arena class "represents an internal task scheduler object where a number of threads, limited by a maximal concurrency level, share and execute tasks". - -The execute function provides dispatch semantics, however it always blocks the caller: - -If possible, the calling thread joins the arena and executes the specified functor, then leaves the arena ... If not possible to join, the call wraps the functor into a task, enqueues it into the arena, waits using an OS kernel synchronization object for a joining opportunity, and finishes after the task completion. - -The enqueue function provides post semantics: - -Enqueues a task into the arena to process specified functor and immediately returns. \ No newline at end of file diff --git a/context/research/p0145r3.md b/context/research/p0145r3.md deleted file mode 100644 index b11441a..0000000 --- a/context/research/p0145r3.md +++ /dev/null @@ -1,257 +0,0 @@ -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -1 -Refining Expression Evaluation Order for -Idiomatic C++ -Gabriel Dos Reis Herb Sutter Jonathan Caves -Abstract -This paper proposes an order of evaluation of operands in expressions, directly -supporting decades-old established and recommended C++ idioms. The result is the -removal of embarrassing traps for novices and experts alike, increased confidence and -safety of popular programming practices and facilities, hallmarks of modern C++. -1. INTRODUCTION -Order of expression evaluation is a recurring discussion topic in the C++ community. In a nutshell, given -an expression such as f(a, b, c), the order in which the sub-expressions f, a, b, c (which are of arbitrary -shapes) are evaluated is left unspecified by the standard. If any two of these sub-expressions happen to -modify the same object without intervening sequence points, the behavior of the program is undefined. -For instance, the expression f(i++, i) where i is an integer variable leads to undefined behavior, as does -v[i] = i++. Even when the behavior is not undefined, the result of evaluating an expression can still be -anybody’s guess. Consider the following program fragment: -#include -int main() { - std::map m; - m[0] = m.size(); // #1 -} -What should the map object m look like after evaluation of the statement marked #1? {{0, 0}} or {{0, 1}}? -1.1. CHANGES FROM PREVIOUS VERSIONS -a. The original version of this proposal (Dos Reis, et al., 2014) received unanimous support from the -Evolution Working Group (EWG) at the Fall 2014 meeting in Urbana, IL, as approved direction, -and also strong support for inclusion in C++17. The most fundamental delta in this revision, -compared to that document, is the inclusion of formal wording for approval into the Working -Draft. -b. Additionally, EWG suggested inclusion of a few more operators. -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -2 -c. We added a couple of sections expanding the rationale behind proposed changes. -d. At the Fall 2015 meeting in Kona, HI, during review by the Core Working Group, some members -of the Core Working Group suggested a variation of the evaluation of function calls as a, separate, -subsidiary proposal. This revision includes two variations for that rule (see section 8.) -e. Explicit use of the phrasing “value computation and side effects associated with” [CWG suggestion -at the Fall 2015 meeting in Kona, HI] -f. Introduced a new “text definition” for “expression X sequenced before expression Y” to replace -the phrase introduced in (e.) [CWG suggestion at the Spring 2016 meeting in Jacksonville, FL] -2. A CORRODING PROBLEM -These questions aren’t for entertainment, or job interview drills, or just for academic interests. The order -of expression evaluation, as it is currently specified in the standard, undermines advices, popular -programming idioms, or the relative safety of standard library facilities. The traps aren’t just for novices -or the careless programmer. They affect all of us indiscriminately, even when we know the rules. -Consider the following program fragment: -void f() -{ - std::string s = “but I have heard it works even if you don’t believe in it”; - s.replace(0, 4, “”).replace(s.find(“even”), 4, “only”).replace(s.find(“ don’t”), 6, “”); - assert(s == “I have heard it works only if you believe in it”); -} -The assertion is supposed to validate the programmer’s intended result. It uses “chaining” of member -function calls, a common standard practice. This code has been reviewed by C++ experts world-wide, and -published (The C++ Programming Language, 4th edition.) Yet, its vulnerability to unspecified order of -evaluation has been discovered only recently by a tool. Even if you would like to blame the “excessive” -chaining, remember that expressions of the form std::cout << f() << g() << h() usually result in chaining, -after the overloaded operators have been resolved into function calls. It is the source of endless -headaches. Newer library facilities such as std::future are also vulnerable to this problem, when -considering chaining of the then() member function to specify a sequence of computation. The solution -isn’t to avoid chaining. Rather, it is to fix the problem at the source: refinement of the language rules. -3. WHY NOW? -The current rules have been in effect for more than three decades. So, why change them now? Well, a -programming language is a set of responses to challenges of its time. Many of the existing rules regarding -order of expression evaluation made sense when C was designed and in the constrained environment -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -3 -where C++ was originally designed and implemented. Some of the justifications probably still hold today. -However, a living and evolving programming language cannot just hold onto inertia. -The language should support contemporary idioms. For example, using << as insertion operator into a -stream is now an elementary idiom. So is chaining member function calls. The language rules should -guarantee that such idioms aren’t programming hazards. We have library facilities (e.g. std::future) -designed to be used idiomatically with chaining. Without the guarantee that the obvious order of -evaluation for function call and member selection is obeyed, these facilities become traps, source of -obscure, hard to track bugs, facile opportunities for vulnerabilities. -The language should support our programming. The changes suggested below are conservative, -pragmatic, with one overriding guiding principle: effective support for idiomatic C++. In particular, when -choosing between several alternatives, we look for what will provide better support for existing idioms, -what will nurture and sustain new programming techniques. Considerations such as how an expression -is internally elaborated (e.g. function call), while important, are secondary. The primary focus is on what -the programmer reads and writes, in particular in generic codes, not what the compiler internally does -according to fairly arcane rules. By generic codes, we don’t just mean “template codes”. We do also -consider “normal” application codes using common notations for conceptually same operations. For -example, consider the expression ary[idx] = expr, a rule that applies uniformly whether ary is a built-in -(dense) array or an associative (sparse) array increases the set of types that ary can take on, hence -supports generic programming. Observe that operators are generally preferred in C++ generic codes -because they cover larger surface than member functions calls, although versions of the uniform function -call syntax may alleviate that to some extent. Even with uniform function call syntax, we still do not know, -looking at a generic code fragment, whether a particular operator or function will resolve to a member -function or not; consequently, the low-level mechanics (which happen after instantiation) should, ideally, -not be the driving force of the choice. Rather, the driver seat should be given to idioms. -4. A SOLUTION -We propose to revise C++ evaluation rules to support decades-old idiomatic constructs and programming -practices. A simple solution would be to require that every expression has a well-defined evaluation -order. That suggestion has traditionally met resistance for various reasons. Rather, this proposalsuggests -a more targeted fix: - Postfix expressions are evaluated from left to right. This includes functions calls and member -selection expressions. - Assignment expressions are evaluated from right to left. This includes compound assignments. - Operands to shift operators are evaluated from left to right. -In summary, the following expressions are evaluated in the order a, then b, then c, then d: -1. a.b -2. a->b -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -4 -3. a->*b -4. a(b1, b2, b3) -5. b @= a -6. a[b] -7. a << b -8. a >> b -Furthermore, we suggest the following additional rule: the order of evaluation of an expression involving -an overloaded operator is determined by the order associated with the corresponding built-in operator, -not the rules for function calls. This rule is to support generic programming and extensive use of -overloaded operators, which are distinctive features of modern C++. -A second, subsidiary proposal replaces the evaluation order of function calls as follows: the function is -evaluated before all its arguments, but any pair of arguments (from the argument list) is indeterminately -sequenced; meaning that one is evaluated before the other but the order is not specified; it is guaranteed -that the function is evaluated before the arguments. This reflects a suggestion made by some members -of the Core Working Group. -5. POSTFIX INCREMENT AND DECREMENT -At the Fall 2014 meeting in Urbana, IL, Clark Nelson observed that the proposal does not suggest when -side effects of postfix increment and postfix decrement are “committed”. Indeed, the current proposal -does not suggest any particular modification to the sequencing of unary expressions. The primary reason -is that we have not found a choice that will support an existing widely used programming idiom or nurture -new programming techniques. Consequently, at this point, we do not propose any change to unary -expressions. The side effects of unary expressions shall be committed before the next expression (if any) -is evaluated if it is part of a binary expression or a function call. The sequencing order of unary expressions -is not changed by this proposal. -6. FORMAL WORDING -The following changes are against N4527, the current Working Draft. -6.1. GENERAL (CLAUSE 1) - Add to paragraph 1.9/13 -An expression X is said to be sequenced before an expression Y if every value computation and -every side effect associated with the expression X is sequenced before every value computation -and every side effect associated with the expression Y. - Remove note from 1.9/16 -[ Note: Value computations and side effects associated with different argument expressions are -unsequenced. —end note ] -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -5 -6.2. EXPRESSIONS (CLAUSE 5) - Change paragraph 5/2 as follows: -[ Note: Operators can be overloaded, that is, given meaning when applied to expressions of -class type (Clause 9) or enumeration type (7.2). Uses of overloaded operators are transformed -into function calls as described in 13.5. Overloaded operators obey the rules for syntax and -evaluation order specified in Clause 5, but the requirements of operand type, and value -category, and evaluation order are replaced by the rules for function call. Relations between -operators, such as ++a meaning a+=1, are not guaranteed for overloaded operators (13.5), and -are not guaranteed for operands of type bool. —end note ] - Add to paragraph 5.2.1/1: -except that in the case of an array operand, the result is an lvalue if that operand is an lvalue -and an xvalue otherwise.The expression E1 is sequenced before the expression E2. - Modify paragraph 5.2.2/4: -When a function is called, each parameter (8.3.5) shall be initialized (8.5, 12.8, 12.1) with its -corresponding argument. [ Note: Such initializations are indeterminately sequenced with -respect to each other —end note ] If the function is a non-static member function, the this -parameter of the function (9.3.2) shall be initialized with a pointer to the object of the call, -converted as if by an explicit type conversion (5.4). The postfix-expression is sequenced before -each expression in the expression-list and any default argument. Every value computation and -side effect associated with the initialization of a parameter, and the initialization itself, is -sequenced before every value computation and side effect associated with the initialization -of any subsequent parameter. -[Example: -void f() -{ - std::string s = “but I have heard it works even if you don’t believe in it”; - s.replace(0, 4, “”).replace(s.find(“even”), 4, “only”).replace(s.find(“ don’t”), 6, “”); - assert(s == “I have heard it works only if you believe in it”); // OK. -} ---end example] - Remove paragraph 5.2.2/8. -[ Note: The evaluations of the postfix expression and of the arguments are all unsequenced -relative to one another. All side effects of argument evaluations are sequenced before the -function is entered (see 1.9). —end note ] - Modify paragraph 5.3.4/18 as follows -The invocation of the allocation function is indeterminately sequenced with respect to before -the evaluations of the expressions in the new-initializer. Initialization of the allocated object is -sequenced before the value computation in the new-expression. It is unspecified whether -expressions in the new-initializer are evaluated if the allocation function returns the null pointer -or exits using an exception. - Remove this phrase from 5.3.4/10 -If any part of the object initialization described above78 terminates by throwing an exception, -storage has been obtained for the object, and a suitable deallocation function can be found, -the deallocation function is called to free the memory in which the object was being constructed, -after which the exception continues to propagate in the context of the new-expression. -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -6 - Append to paragraph 5.5/4 -If the dynamic type of E1 does not contain the member to which E2 refers, the behavior is -undefined. Otherwise, the expression E1 is sequenced before the expression E2. - Add a new paragraph 5.8/4 to section 5.8 -The expression E1 is sequenced before the expression E2. - Add to paragraph 5.18/1 -In all cases, the assignment is sequenced after the value computation of the right and left -operands, and before the value computation of the assignment expression. The right operand -is sequenced before the left operand. -. - Remove footnote 88 from paragraph 5.19/1. -88) However, an invocation of an overloaded comma operator is an ordinary function call; hence, the -evaluations of its argument expressions are unsequenced relative to one another (see 1.9). -6.3. EXPRESSION LIST IN INITIALIZERS (SECTION 8.5) -Add a new paragraph 8.5/19 -If the initializer is a parenthesized expression-list, the expressions are evaluated in the order -specified for function calls (5.2.2). -6.4. OVERLOADED OPERATORS (CLAUSE 13) - Append to paragraph 13.3.1.2/2 -Therefore, the operator notation is first transformed to the equivalent function-call notation as -summarized in Table 10(where @ denotes one of the operators covered in the specified subclause). -However, the operands are sequenced in the order prescribed for the built-in operator (Clause 5). -7. IMPLEMENTATION EXPERIENCE REPORT -We modified the Visual C++ compiler to measure the impacts in the worst case scenario. That is - Keep the existing optimizers as they are with no new optimization that exploits the -proposed evaluation rules (so that the optimizers are artificially ‘hampered’) - Forcefully impose a left-to-right evaluation of argument list in function calls (except the in -case where there is a documented existing bug being separately addressed) -We successfully built, installed, and booted the NT kernel. Then we built a large application code base, -and ran “build, validation, test suites.” That uncovered sources of potential bugs due to non-portability -assumptions: one real-world-code test failed, out of 26. Then, we compiled and ran Spec benchmarks. -We found that some entries in the benchmark suite ran slower, othersran faster compared to the scenario -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -7 -where the evaluation of the argument list is left unspecified. The variation is between -4% and +4%. It is -worth noting that these results are for the worst case scenario where the optimizers have not yet been -updated to be aware of, and take advantage of the new evaluation rules and they are blindly forced to -evaluate function calls from left to right. It is clear that the left-to-right evaluation strategy is triggering -new optimization paths (different inlining decisions and different register allocation) affecting the -variations in the benchmark performance. If appears those opportunities have not traditionally been -exploited, even though permitted under the unspecified order regime. -Based on these experiments, we feel confident recommending the left-to-right evaluation rules for -syntactic function calls and in the functional cast notation involving more than one arguments in the -argument list. -8. ALTERNATE EVALUATION ORDER FOR FUNCTION CALLS -During the wording review at the Fall 2015 meeting in Kona, HI, some members of CWG expressed a desire -for an alternate evaluation rule for function calls: the expression in the function position is evaluated -before all the arguments and the evaluations of the arguments are indeterminately sequenced, but with -no interleaving. We do not believe that such a nondeterminism brings any substantial added optimization -benefit, but it does perpetuate the confusion and hazards around order of evaluations in function calls. It -perpetuates unnecessary confusion around brace-initialization vs. direct initialization using parenthesis. -Such an ordering would be implemented by the following requirement added to 5.2.2/4: -The value computation and associated side-effect of the postfix-expression are sequenced before -those of the expressions in the expression-list. The initializations of the declared parameters are -indeterminately sequenced with no interleaving. -At the Spring 2016 meeting in Jacksonville, FL, EWG decided (via a poll) not to pursue this alternative -evaluation order. -9. ACKNOWLEDGEMENT -Thanks to Bjarne Stroustrup for discussing this issue with us. We acknowledge the numerous people who -contributed to the discussions on the committee reflectors, as well as in private, including Chris -Hawblitzel, Jim Hogg, Jason Merrill, Gor Nishanov, Andrew Pardoe, Dave Sielaff, and Jim Springfield. CWG -provided useful feedback for presentation and helped with more accurate reflection of the design in the -formal wording. -10. REFERENCES -Gabriel Dos Reis, Herb Sutter and Jonathan Caves Refining Expression Evaluation Order for Idiomatic C++ -[http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4228.pdf]. - 2014. - Doc. N4228. -P0145R3 2016-06-23 Reply-To: gdr@microsoft.com -8 \ No newline at end of file diff --git a/context/research/p2300.md b/context/research/p2300.md deleted file mode 100644 index 9751803..0000000 --- a/context/research/p2300.md +++ /dev/null @@ -1,6506 +0,0 @@ -↑ -→ -P2300R10 -std::execution -Published Proposal, 2024-06-28 -Authors: -Michał Dominiak -Georgy Evtushenko -Lewis Baker -Lucian Radu Teodorescu -Lee Howes -Kirk Shoop -Michael Garland -Eric Niebler -Bryce Adelstein Lelbach -Source: -GitHub -Issue Tracking: -GitHub -Project: -ISO/IEC 14882 Programming Languages — C++, ISO/IEC JTC1/SC22/WG21 -Audience: -SG1, LEWG -Table of Contents -1 -Introduction -1.1 -Motivation -1.2 -Priorities -1.3 -Examples: End User -1.3.1 -Hello world -1.3.2 -Asynchronous inclusive scan -1.3.3 -Asynchronous dynamically-sized read -1.4 -Asynchronous Windows socket recv -1.4.1 -More end-user examples -1.4.1.1 -Sudoku solver -1.4.1.2 -File copy -1.4.1.3 -Echo server -1.5 -Examples: Algorithms -1.5.1 -then -1.5.2 -retry -1.6 -Examples: Schedulers -1.6.1 -Inline scheduler -1.6.2 -Single thread scheduler -1.7 -Examples: Server theme -1.7.1 -Composability with execution::let_* -1.7.2 -Moving between execution resources with execution::starts_on and execution::continues_on -1.8 -Design changes from P0443 -1.9 -Prior art -1.9.1 -Futures -1.9.2 -Coroutines -1.9.3 -Callbacks -1.10 -Field experience -1.10.1 -libunifex -1.10.2 -stdexec -1.10.3 -Other implementations -1.10.4 -Inspirations -2 -Revision history -2.1 -R10 -2.2 -R9 -2.3 -R8 -2.4 -R7 -2.5 -R6 -2.5.1 -Environments and attributes -2.6 -R5 -2.7 -R4 -2.7.1 -Dependently-typed senders -2.8 -R3 -2.9 -R2 -2.10 -R1 -2.11 -R0 -3 -Design - introduction -3.1 -Conventions -3.2 -Queries and algorithms -4 -Design - user side -4.1 -Execution resources describe the place of execution -4.2 -Schedulers represent execution resources -4.3 -Senders describe work -4.4 -Senders are composable through sender algorithms -4.5 -Senders can propagate completion schedulers -4.5.1 -execution::get_completion_scheduler -4.6 -Execution resource transitions are explicit -4.7 -Senders can be either multi-shot or single-shot -4.8 -Senders are forkable -4.9 -Senders support cancellation -4.9.1 -Cancellation design summary -4.9.2 -Support for cancellation is optional -4.9.3 -Cancellation is inherently racy -4.9.4 -Cancellation design status -4.10 -Sender factories and adaptors are lazy -4.10.1 -Eager execution leads to detached work or worse -4.10.2 -Eager senders complicate algorithm implementations -4.10.3 -Eager senders incur cancellation-related overhead -4.10.4 -Eager senders cannot access execution resource from the receiver -4.11 -Schedulers advertise their forward progress guarantees -4.12 -Most sender adaptors are pipeable -4.13 -A range of senders represents an async sequence of data -4.14 -Senders can represent partial success -4.15 -All awaitables are senders -4.16 -Many senders can be trivially made awaitable -4.17 -Cancellation of a sender can unwind a stack of coroutines -4.18 -Composition with parallel algorithms -4.19 -User-facing sender factories -4.19.1 -execution::schedule -4.19.2 -execution::just -4.19.3 -execution::just_error -4.19.4 -execution::just_stopped -4.19.5 -execution::read_env -4.20 -User-facing sender adaptors -4.20.1 -execution::continues_on -4.20.2 -execution::then -4.20.3 -execution::upon_* -4.20.4 -execution::let_* -4.20.5 -execution::starts_on -4.20.6 -execution::into_variant -4.20.7 -execution::stopped_as_optional -4.20.8 -execution::stopped_as_error -4.20.9 -execution::bulk -4.20.10 -execution::split -4.20.11 -execution::when_all -4.21 -User-facing sender consumers -4.21.1 -this_thread::sync_wait -5 -Design - implementer side -5.1 -Receivers serve as glue between senders -5.2 -Operation states represent work -5.3 -execution::connect -5.4 -Sender algorithms are customizable -5.5 -Sender adaptors are lazy -5.6 -Lazy senders provide optimization opportunities -5.7 -Execution resource transitions are two-step -5.8 -All senders are typed -5.9 -Customization points -6 -Specification -14 -Exception handling [except] -14.6 -Special functions [except.special] -14.6.2 -The std::terminate function [except.terminate] -16 -Library introduction [library] -17 -Language support library [cpp] -17.3 -Implementation properties [support.limits] -17.3.2 -Header synopsis [version.syn] -22 -General utilities library [utilities] -22.10 -Function objects [function.objects] -22.10.2 -Header synopsis [functional.syn] -33 -Concurrency support library [thread] -33.3 -Stop tokens [thread.stoptoken] -33.3.1 -Introduction [thread.stoptoken.intro] -33.3.2 -Header synopsis [thread.stoptoken.syn] -33.3.3 -Stop token concepts [stoptoken.concepts] -33.3.4 -Class stop_token [stoptoken] -33.3.4.1 -General [stoptoken.general] -33.3.4.2 -Constructors, copy, and assignment [stoptoken.cons] -33.3.4.3 -Member functions [stoptoken.mem] -33.3.4.4 -Non-member functions [stoptoken.nonmembers] -33.3.5 -Class stop_source [stopsource] -33.3.5.1 -General [stopsource.general] -33.3.5.2 -Constructors, copy, and assignment [stopsource.cons] -33.3.5.3 -Member functions [stopsource.mem] -33.3.5.4 -Non-member functions [stopsource.nonmembers] -33.3.6 -Class template stop_callback [stopcallback] -33.3.6.1 -General [stopcallback.general] -33.3.6.2 -Constructors and destructor [stopcallback.cons] -33.3.7 -Class never_stop_token [stoptoken.never] -33.3.7.1 -General [stoptoken.never.general] -33.3.8 -Class inplace_stop_token [stoptoken.inplace] -33.3.8.1 -General [stoptoken.inplace.general] -33.3.8.2 -Member functions [stoptoken.inplace.members] -33.3.9 -Class inplace_stop_source [stopsource.inplace] -33.3.9.1 -General [stopsource.inplace.general] -33.3.9.2 -Constructors, copy, and assignment [stopsource.inplace.cons] -33.3.9.3 -Members [stopsource.inplace.mem] -33.3.10 -Class template inplace_stop_callback [stopcallback.inplace] -33.3.10.1 -General [stopcallback.inplace.general] -33.3.10.2 -Constructors and destructor [stopcallback.inplace.cons] -34 -Execution control library [exec] -34.1 -General [exec.general] -34.2 -Queries and queryables [exec.queryable] -34.2.1 -General [exec.queryable.general] -34.2.2 -queryable concept [exec.queryable.concept] -34.3 -Asynchronous operations [async.ops] -34.4 -Header synopsis [exec.syn] -34.5 -Queries [exec.queries] -34.5.1 -forwarding_query [exec.fwd.env] -34.5.2 -get_allocator [exec.get.allocator] -34.5.3 -get_stop_token [exec.get.stop.token] -34.5.4 -execution::get_env [exec.get.env] -34.5.5 -execution::get_domain [exec.get.domain] -34.5.6 -execution::get_scheduler [exec.get.scheduler] -34.5.7 -execution::get_delegation_scheduler [exec.get.delegation.scheduler] -34.5.8 -execution::get_forward_progress_guarantee [exec.get.forward.progress.guarantee] -34.5.9 -execution::get_completion_scheduler [exec.completion.scheduler] -34.6 -Schedulers [exec.sched] -34.7 -Receivers [exec.recv] -34.7.1 -Receiver concepts [exec.recv.concepts] -34.7.2 -execution::set_value [exec.set.value] -34.7.3 -execution::set_error [exec.set.error] -34.7.4 -execution::set_stopped [exec.set.stopped] -34.8 -Operation states [exec.opstate] -34.8.1 -execution::start [exec.opstate.start] -34.9 -Senders [exec.snd] -34.9.1 -General [exec.snd.general] -34.9.2 -Sender concepts [exec.snd.concepts] -34.9.3 -Awaitable helpers [exec.awaitables] -34.9.4 -execution::default_domain [exec.domain.default] -34.9.4.1 -Static members [exec.domain.default.statics] -34.9.5 -execution::transform_sender [exec.snd.transform] -34.9.6 -execution::transform_env [exec.snd.transform.env] -34.9.7 -execution::apply_sender [exec.snd.apply] -34.9.8 -execution::get_completion_signatures [exec.getcomplsigs] -34.9.9 -execution::connect [exec.connect] -34.9.10 -Sender factories [exec.factories] -34.9.10.1 -execution::schedule [exec.schedule] -34.9.10.2 -execution::just, execution::just_error, execution::just_stopped [exec.just] -34.9.10.3 -execution::read_env [exec.read.env] -34.9.11 -Sender adaptors [exec.adapt] -34.9.11.1 -General [exec.adapt.general] -34.9.11.2 -Sender adaptor closure objects [exec.adapt.objects] -34.9.11.3 -execution::starts_on [exec.starts.on] -34.9.11.4 -execution::continues_on [exec.continues.on] -34.9.11.5 -execution::schedule_from [exec.schedule.from] -34.9.11.6 -execution::on [exec.on] -34.9.11.7 -execution::then, execution::upon_error, execution::upon_stopped [exec.then] -34.9.11.8 -execution::let_value, execution::let_error, execution::let_stopped, [exec.let] -34.9.11.9 -execution::bulk [exec.bulk] -34.9.11.10 -execution::split [exec.split] -34.9.11.11 -execution::when_all [exec.when.all] -34.9.11.12 -execution::into_variant [exec.into.variant] -34.9.11.13 -execution::stopped_as_optional [exec.stopped.as.optional] -34.9.11.14 -execution::stopped_as_error [exec.stopped.as.error] -34.9.12 -Sender consumers [exec.consumers] -34.9.12.1 -this_thread::sync_wait [exec.sync.wait] -34.10 -Sender/receiver utilities [exec.utils] -34.10.1 -execution::completion_signatures [exec.utils.cmplsigs] -34.10.2 -execution::transform_completion_signatures [exec.utils.tfxcmplsigs] -34.11 -Execution contexts [exec.ctx] -34.11.1 -execution::run_loop [exec.run.loop] -34.11.1.1 -Associated types [exec.run.loop.types] -34.11.1.2 -Constructor and destructor [exec.run.loop.ctor] -34.11.1.3 -Member functions [exec.run.loop.members] -34.12 -Coroutine utilities [exec.coro.utils] -34.12.1 -execution::as_awaitable [exec.as.awaitable] -34.12.2 -execution::with_awaitable_senders [exec.with.awaitable.senders] -Index -Terms defined by this specification -References -Informative References -1. Introduction -This paper proposes a self-contained design for a Standard C++ framework for managing asynchronous execution on generic execution resources. It is based on the ideas in A Unified Executors Proposal for C++ and its companion papers. - -1.1. Motivation -Today, C++ software is increasingly asynchronous and parallel, a trend that is likely to only continue going forward. Asynchrony and parallelism appears everywhere, from processor hardware interfaces, to networking, to file I/O, to GUIs, to accelerators. Every C++ domain and every platform needs to deal with asynchrony and parallelism, from scientific computing to video games to financial services, from the smallest mobile devices to your laptop to GPUs in the world’s fastest supercomputer. - -While the C++ Standard Library has a rich set of concurrency primitives (std::atomic, std::mutex, std::counting_semaphore, etc) and lower level building blocks (std::thread, etc), we lack a Standard vocabulary and framework for asynchrony and parallelism that C++ programmers desperately need. std::async/std::future/std::promise, C++11’s intended exposure for asynchrony, is inefficient, hard to use correctly, and severely lacking in genericity, making it unusable in many contexts. We introduced parallel algorithms to the C++ Standard Library in C++17, and while they are an excellent start, they are all inherently synchronous and not composable. - -This paper proposes a Standard C++ model for asynchrony based around three key abstractions: schedulers, senders, and receivers, and a set of customizable asynchronous algorithms. - -1.2. Priorities -Be composable and generic, allowing users to write code that can be used with many different types of execution resources. - -Encapsulate common asynchronous patterns in customizable and reusable algorithms, so users don’t have to invent things themselves. - -Make it easy to be correct by construction. - -Support the diversity of execution resources and execution agents, because not all execution agents are created equal; some are less capable than others, but not less important. - -Allow everything to be customized by an execution resource, including transfer to other execution resources, but don’t require that execution resources customize everything. - -Care about all reasonable use cases, domains and platforms. - -Errors must be propagated, but error handling must not present a burden. - -Support cancellation, which is not an error. - -Have clear and concise answers for where things execute. - -Be able to manage and terminate the lifetimes of objects asynchronously. - -1.3. Examples: End User -In this section we demonstrate the end-user experience of asynchronous programming directly with the sender algorithms presented in this paper. See § 4.19 User-facing sender factories, § 4.20 User-facing sender adaptors, and § 4.21 User-facing sender consumers for short explanations of the algorithms used in these code examples. - -1.3.1. Hello world -using namespace std::execution; - -scheduler auto sch = thread_pool.scheduler(); // 1 - -sender auto begin = schedule(sch); // 2 -sender auto hi = then(begin, []{ // 3 - std::cout << "Hello world! Have an int."; // 3 - return 13; // 3 -}); // 3 -sender auto add_42 = then(hi, [](int arg) { return arg + 42; }); // 4 - -auto [i] = this_thread::sync_wait(add_42).value(); // 5 -This example demonstrates the basics of schedulers, senders, and receivers: - -First we need to get a scheduler from somewhere, such as a thread pool. A scheduler is a lightweight handle to an execution resource. - -To start a chain of work on a scheduler, we call § 4.19.1 execution::schedule, which returns a sender that completes on the scheduler. A sender describes asynchronous work and sends a signal (value, error, or stopped) to some recipient(s) when that work completes. - -We use sender algorithms to produce senders and compose asynchronous work. § 4.20.2 execution::then is a sender adaptor that takes an input sender and a std::invocable, and calls the std::invocable on the signal sent by the input sender. The sender returned by then sends the result of that invocation. In this case, the input sender came from schedule, so its void, meaning it won’t send us a value, so our std::invocable takes no parameters. But we return an int, which will be sent to the next recipient. - -Now, we add another operation to the chain, again using § 4.20.2 execution::then. This time, we get sent a value - the int from the previous step. We add 42 to it, and then return the result. - -Finally, we’re ready to submit the entire asynchronous pipeline and wait for its completion. Everything up until this point has been completely asynchronous; the work may not have even started yet. To ensure the work has started and then block pending its completion, we use § 4.21.1 this_thread::sync_wait, which will either return a std::optional> with the value sent by the last sender, or an empty std::optional if the last sender sent a stopped signal, or it throws an exception if the last sender sent an error. - -1.3.2. Asynchronous inclusive scan -using namespace std::execution; - -sender auto async_inclusive_scan(scheduler auto sch, // 2 - std::span input, // 1 - std::span output, // 1 - double init, // 1 - std::size_t tile_count) // 3 -{ - std::size_t const tile_size = (input.size() + tile_count - 1) / tile_count; - - std::vector partials(tile_count + 1); // 4 - partials[0] = init; // 4 - - return just(std::move(partials)) // 5 - | continues_on(sch) - | bulk(tile_count, // 6 - [ = ](std::size_t i, std::vector& partials) { // 7 - auto start = i * tile_size; // 8 - auto end = std::min(input.size(), (i + 1) * tile_size); // 8 - partials[i + 1] = *--std::inclusive_scan(begin(input) + start, // 9 - begin(input) + end, // 9 - begin(output) + start); // 9 - }) // 10 - | then( // 11 - [](std::vector&& partials) { - std::inclusive_scan(begin(partials), end(partials), // 12 - begin(partials)); // 12 - return std::move(partials); // 13 - }) - | bulk(tile_count, // 14 - [ = ](std::size_t i, std::vector& partials) { // 14 - auto start = i * tile_size; // 14 - auto end = std::min(input.size(), (i + 1) * tile_size); // 14 - std::for_each(begin(output) + start, begin(output) + end, // 14 - [&] (double& e) { e = partials[i] + e; } // 14 - ); - }) - | then( // 15 - [ = ](std::vector&& partials) { // 15 - return output; // 15 - }); // 15 -} -This example builds an asynchronous computation of an inclusive scan: - -It scans a sequence of doubles (represented as the std::span input) and stores the result in another sequence of doubles (represented as std::span output). - -It takes a scheduler, which specifies what execution resource the scan should be launched on. - -It also takes a tile_count parameter that controls the number of execution agents that will be spawned. - -First we need to allocate temporary storage needed for the algorithm, which we’ll do with a std::vector, partials. We need one double of temporary storage for each execution agent we create. - -Next we’ll create our initial sender with § 4.19.2 execution::just and § 4.20.1 execution::continues_on. These senders will send the temporary storage, which we’ve moved into the sender. The sender has a completion scheduler of sch, which means the next item in the chain will use sch. - -Senders and sender adaptors support composition via operator|, similar to C++ ranges. We’ll use operator| to attach the next piece of work, which will spawn tile_count execution agents using § 4.20.9 execution::bulk (see § 4.12 Most sender adaptors are pipeable for details). - -Each agent will call a std::invocable, passing it two arguments. The first is the agent’s index (i) in the § 4.20.9 execution::bulk operation, in this case a unique integer in [0, tile_count). The second argument is what the input sender sent - the temporary storage. - -We start by computing the start and end of the range of input and output elements that this agent is responsible for, based on our agent index. - -Then we do a sequential std::inclusive_scan over our elements. We store the scan result for our last element, which is the sum of all of our elements, in our temporary storage partials. - -After all computation in that initial § 4.20.9 execution::bulk pass has completed, every one of the spawned execution agents will have written the sum of its elements into its slot in partials. - -Now we need to scan all of the values in partials. We’ll do that with a single execution agent which will execute after the § 4.20.9 execution::bulk completes. We create that execution agent with § 4.20.2 execution::then. - -§ 4.20.2 execution::then takes an input sender and an std::invocable and calls the std::invocable with the value sent by the input sender. Inside our std::invocable, we call std::inclusive_scan on partials, which the input senders will send to us. - -Then we return partials, which the next phase will need. - -Finally we do another § 4.20.9 execution::bulk of the same shape as before. In this § 4.20.9 execution::bulk, we will use the scanned values in partials to integrate the sums from other tiles into our elements, completing the inclusive scan. - -async_inclusive_scan returns a sender that sends the output std::span. A consumer of the algorithm can chain additional work that uses the scan result. At the point at which async_inclusive_scan returns, the computation may not have completed. In fact, it may not have even started. - -1.3.3. Asynchronous dynamically-sized read -using namespace std::execution; - -sender_of auto async_read( // 1 - sender_of> auto buffer, // 1 - auto handle); // 1 - -struct dynamic_buffer { // 3 - std::unique_ptr data; // 3 - std::size_t size; // 3 -}; // 3 - -sender_of auto async_read_array(auto handle) { // 2 - return just(dynamic_buffer{}) // 4 - | let_value([handle] (dynamic_buffer& buf) { // 5 - return just(std::as_writeable_bytes(std::span(&buf.size, 1))) // 6 - | async_read(handle) // 7 - | then( // 8 - [&buf] (std::size_t bytes_read) { // 9 - assert(bytes_read == sizeof(buf.size)); // 10 - buf.data = std::make_unique(buf.size); // 11 - return std::span(buf.data.get(), buf.size); // 12 - }) - | async_read(handle) // 13 - | then( - [&buf] (std::size_t bytes_read) { - assert(bytes_read == buf.size); // 14 - return std::move(buf); // 15 - }); - }); -} -This example demonstrates a common asynchronous I/O pattern - reading a payload of a dynamic size by first reading the size, then reading the number of bytes specified by the size: - -async_read is a pipeable sender adaptor. It’s a customization point object, but this is what it’s call signature looks like. It takes a sender parameter which must send an input buffer in the form of a std::span, and a handle to an I/O context. It will asynchronously read into the input buffer, up to the size of the std::span. It returns a sender which will send the number of bytes read once the read completes. - -async_read_array takes an I/O handle and reads a size from it, and then a buffer of that many bytes. It returns a sender that sends a dynamic_buffer object that owns the data that was sent. - -dynamic_buffer is an aggregate struct that contains a std::unique_ptr and a size. - -The first thing we do inside of async_read_array is create a sender that will send a new, empty dynamic_array object using § 4.19.2 execution::just. We can attach more work to the pipeline using operator| composition (see § 4.12 Most sender adaptors are pipeable for details). - -We need the lifetime of this dynamic_array object to last for the entire pipeline. So, we use let_value, which takes an input sender and a std::invocable that must return a sender itself (see § 4.20.4 execution::let_* for details). let_value sends the value from the input sender to the std::invocable. Critically, the lifetime of the sent object will last until the sender returned by the std::invocable completes. - -Inside of the let_value std::invocable, we have the rest of our logic. First, we want to initiate an async_read of the buffer size. To do that, we need to send a std::span pointing to buf.size. We can do that with § 4.19.2 execution::just. - -We chain the async_read onto the § 4.19.2 execution::just sender with operator|. - -Next, we pipe a std::invocable that will be invoked after the async_read completes using § 4.20.2 execution::then. - -That std::invocable gets sent the number of bytes read. - -We need to check that the number of bytes read is what we expected. - -Now that we have read the size of the data, we can allocate storage for it. - -We return a std::span to the storage for the data from the std::invocable. This will be sent to the next recipient in the pipeline. - -And that recipient will be another async_read, which will read the data. - -Once the data has been read, in another § 4.20.2 execution::then, we confirm that we read the right number of bytes. - -Finally, we move out of and return our dynamic_buffer object. It will get sent by the sender returned by async_read_array. We can attach more things to that sender to use the data in the buffer. - -1.4. Asynchronous Windows socket recv -To get a better feel for how this interface might be used by low-level operations see this example implementation of a cancellable async_recv() operation for a Windows Socket. - -struct operation_base : WSAOVERALAPPED { - using completion_fn = void(operation_base* op, DWORD bytesTransferred, int errorCode) noexcept; - - // Assume IOCP event loop will call this when this OVERLAPPED structure is dequeued. - completion_fn* completed; -}; - -template -struct recv_op : operation_base { - using operation_state_concept = std::execution::operation_state_t; - - recv_op(SOCKET s, void* data, size_t len, Receiver r) - : receiver(std::move(r)) - , sock(s) { - this->Internal = 0; - this->InternalHigh = 0; - this->Offset = 0; - this->OffsetHigh = 0; - this->hEvent = NULL; - this->completed = &recv_op::on_complete; - buffer.len = len; - buffer.buf = static_cast(data); - } - - void start() & noexcept { - // Avoid even calling WSARecv() if operation already cancelled - auto st = std::execution::get_stop_token( - std::execution::get_env(receiver)); - if (st.stop_requested()) { - std::execution::set_stopped(std::move(receiver)); - return; - } - - // Store and cache result here in case it changes during execution - const bool stopPossible = st.stop_possible(); - if (!stopPossible) { - ready.store(true, std::memory_order_relaxed); - } - - // Launch the operation - DWORD bytesTransferred = 0; - DWORD flags = 0; - int result = WSARecv(sock, &buffer, 1, &bytesTransferred, &flags, - static_cast(this), NULL); - if (result == SOCKET_ERROR) { - int errorCode = WSAGetLastError(); - if (errorCode != WSA_IO_PENDING) { - if (errorCode == WSA_OPERATION_ABORTED) { - std::execution::set_stopped(std::move(receiver)); - } else { - std::execution::set_error(std::move(receiver), - std::error_code(errorCode, std::system_category())); - } - return; - } - } else { - // Completed synchronously (assuming FILE_SKIP_COMPLETION_PORT_ON_SUCCESS has been set) - execution::set_value(std::move(receiver), bytesTransferred); - return; - } - - // If we get here then operation has launched successfully and will complete asynchronously. - // May be completing concurrently on another thread already. - if (stopPossible) { - // Register the stop callback - stopCallback.emplace(std::move(st), cancel_cb{*this}); - - // Mark as 'completed' - if (ready.load(std::memory_order_acquire) || - ready.exchange(true, std::memory_order_acq_rel)) { - // Already completed on another thread - stopCallback.reset(); - - BOOL ok = WSAGetOverlappedResult(sock, (WSAOVERLAPPED*)this, &bytesTransferred, FALSE, &flags); - if (ok) { - std::execution::set_value(std::move(receiver), bytesTransferred); - } else { - int errorCode = WSAGetLastError(); - std::execution::set_error(std::move(receiver), - std::error_code(errorCode, std::system_category())); - } - } - } - } - - struct cancel_cb { - recv_op& op; - - void operator()() noexcept { - CancelIoEx((HANDLE)op.sock, (OVERLAPPED*)(WSAOVERLAPPED*)&op); - } - }; - - static void on_complete(operation_base* op, DWORD bytesTransferred, int errorCode) noexcept { - recv_op& self = *static_cast(op); - - if (self.ready.load(std::memory_order_acquire) || - self.ready.exchange(true, std::memory_order_acq_rel)) { - // Unsubscribe any stop callback so we know that CancelIoEx() is not accessing 'op' - // any more - self.stopCallback.reset(); - - if (errorCode == 0) { - std::execution::set_value(std::move(self.receiver), bytesTransferred); - } else { - std::execution::set_error(std::move(self.receiver), - std::error_code(errorCode, std::system_category())); - } - } - } - - using stop_callback_t = stop_callback_of_t>, cancel_cb>; - - Receiver receiver; - SOCKET sock; - WSABUF buffer; - std::optional stopCallback; - std::atomic ready{false}; -}; - -struct recv_sender { - using sender_concept = std::execution::sender_t; - SOCKET sock; - void* data; - size_t len; - - template - recv_op connect(Receiver r) const { - return recv_op{sock, data, len, std::move(r)}; - } -}; - -recv_sender async_recv(SOCKET s, void* data, size_t len) { - return recv_sender{s, data, len}; -} -1.4.1. More end-user examples -1.4.1.1. Sudoku solver -This example comes from Kirk Shoop, who ported an example from TBB’s documentation to sender/receiver in his fork of the libunifex repo. It is a Sudoku solver that uses a configurable number of threads to explore the search space for solutions. - -The sender/receiver-based Sudoku solver can be found here. Some things that are worth noting about Kirk’s solution: - -Although it schedules asynchronous work onto a thread pool, and each unit of work will schedule more work, its use of structured concurrency patterns make reference counting unnecessary. The solution does not make use of shared_ptr. - -In addition to eliminating the need for reference counting, the use of structured concurrency makes it easy to ensure that resources are cleaned up on all code paths. In contrast, the TBB example that inspired this one leaks memory. - -For comparison, the TBB-based Sudoku solver can be found here. - -1.4.1.2. File copy -This example also comes from Kirk Shoop which uses sender/receiver to recursively copy the files a directory tree. It demonstrates how sender/receiver can be used to do IO, using a scheduler that schedules work on Linux’s io_uring. - -As with the Sudoku example, this example obviates the need for reference counting by employing structured concurrency. It uses iteration with an upper limit to avoid having too many open file handles. - -You can find the example here. - -1.4.1.3. Echo server -Dietmar Kuehl has proposed networking APIs that use the sender/receiver abstraction (see P2762). He has implemented an echo server as a demo. His echo server code can be found here. - -Below, I show the part of the echo server code. This code is executed for each client that connects to the echo server. In a loop, it reads input from a socket and echos the input back to the same socket. All of this, including the loop, is implemented with generic async algorithms. - -outstanding.start( - EX::repeat_effect_until( - EX::let_value( - NN::async_read_some(ptr->d_socket, - context.scheduler(), - NN::buffer(ptr->d_buffer)) - | EX::then([ptr](::std::size_t n){ - ::std::cout << "read='" << ::std::string_view(ptr->d_buffer, n) << "'\n"; - ptr->d_done = n == 0; - return n; - }), - [&context, ptr](::std::size_t n){ - return NN::async_write_some(ptr->d_socket, - context.scheduler(), - NN::buffer(ptr->d_buffer, n)); - }) - | EX::then([](auto&&...){}) - , [owner = ::std::move(owner)]{ return owner->d_done; } - ) -); -In this code, NN::async_read_some and NN::async_write_some are asynchronous socket-based networking APIs that return senders. EX::repeat_effect_until, EX::let_value, and EX::then are fully generic sender adaptor algorithms that accept and return senders. - -This is a good example of seamless composition of async IO functions with non-IO operations. And by composing the senders in this structured way, all the state for the composite operation -- the repeat_effect_until expression and all its child operations -- is stored altogether in a single object. - -1.5. Examples: Algorithms -In this section we show a few simple sender/receiver-based algorithm implementations. - -1.5.1. then -namespace stdexec = std::execution; - -template -class _then_receiver : public R { - F f_; - - public: - _then_receiver(R r, F f) : R(std::move(r)), f_(std::move(f)) {} - - // Customize set_value by invoking the callable and passing the result to - // the inner receiver - template - requires std::invocable - void set_value(As&&... as) && noexcept { - try { - stdexec::set_value(std::move(*this).base(), std::invoke((F&&) f_, (As&&) as...)); - } catch(...) { - stdexec::set_error(std::move(*this).base(), std::current_exception()); - } - } -}; - -template -struct _then_sender { - using sender_concept = stdexec::sender_t; - S s_; - F f_; - - template - using _set_value_t = stdexec::completion_signatures< - stdexec::set_value_t(std::invoke_result_t)>; - - using _except_ptr_sig = - stdexec::completion_signatures; - - // Compute the completion signatures - template - auto get_completion_signatures(Env&& env) && noexcept - -> stdexec::transform_completion_signatures_of< - S, Env, _except_ptr_sig, _set_value_t> { - return {}; - } - - // Connect: - template - auto connect(R r) && -> stdexec::connect_result_t> { - return stdexec::connect( - (S&&) s_, _then_receiver{(R&&) r, (F&&) f_}); - } - - decltype(auto) get_env() const noexcept { - return get_env(s_); - } -}; - -template -stdexec::sender auto then(S s, F f) { - return _then_sender{(S&&) s, (F&&) f}; -} -This code builds a then algorithm that transforms the value(s) from the input sender with a transformation function. The result of the transformation becomes the new value. The other receiver functions (set_error and set_stopped), as well as all receiver queries, are passed through unchanged. - -In detail, it does the following: - -Defines a receiver in terms of receiver and an invocable that: - -Defines a constrained set_value member function for transforming the value channel. - -Delegates set_error and set_stopped to the inner receiver. - -Defines a sender that aggregates another sender and the invocable, which defines a connect member function that wraps the incoming receiver in the receiver from (1) and passes it and the incoming sender to std::execution::connect, returning the result. It also defines a get_completion_signatures member function that declares the sender’s completion signatures when executed within a particular environment. - -1.5.2. retry -using namespace std; -namespace stdexec = execution; - -template -concept _decays_to = same_as, To>; - -// _conv needed so we can emplace construct non-movable types into -// a std::optional. -template -struct _conv { - F f_; - - static_assert(is_nothrow_move_constructible_v); - explicit _conv(F f) noexcept : f_((F&&) f) {} - - operator invoke_result_t() && { - return ((F&&) f_)(); - } -}; - -template -struct _retry_op; - -// pass through all customizations except set_error, which retries -// the operation. -template -struct _retry_receiver { - _retry_op* o_; - - void set_value(auto&&... as) && noexcept { - stdexec::set_value(std::move(o_->r_), (decltype(as)&&) as...); - } - - void set_error(auto&&) && noexcept { - o_->_retry(); // This causes the op to be retried - } - - void set_stopped() && noexcept { - stdexec::set_stopped(std::move(o_->r_)); - } - - decltype(auto) get_env() const noexcept { - return get_env(o_->r_); - } -}; - -// Hold the nested operation state in an optional so we can -// re-construct and re-start it if the operation fails. -template -struct _retry_op { - using operation_state_concept = stdexec::operation_state_t; - using _child_op_t = - stdexec::connect_result_t>; - - S s_; - R r_; - optional<_child_op_t> o_; - - _op(_op&&) = delete; - _op(S s, R r) - : s_(std::move(s)), r_(std::move(r)), o_{_connect()} {} - - auto _connect() noexcept { - return _conv{[this] { - return stdexec::connect(s_, _retry_receiver{this}); - }}; - } - - void _retry() noexcept { - try { - o_.emplace(_connect()); // potentially-throwing - stdexec::start(*o_); - } catch(...) { - stdexec::set_error(std::move(r_), std::current_exception()); - } - } - - void start() & noexcept { - stdexec::start(*o_); - } -}; - -// Helpers for computing the then sender’s completion signatures: -template - using _value_t = - stdexec::completion_signatures; - -template - using _error_t = stdexec::completion_signatures<>; - -using _except_sig = - stdexec::completion_signatures; - -template -struct _retry_sender { - using sender_concept = stdexec::sender_t; - S s_; - explicit _retry_sender(S s) : s_(std::move(s)) {} - - // Declare the signatures with which this sender can complete - template - using _compl_sigs = - stdexec::transform_completion_signatures_of< - S&, Env, _except_sig, _value_t, _error_t>; - - template - auto get_completion_signatures(Env&&) const noexcept -> _compl_sigs { - return {}; - } - - template - requires stdexec::sender_to> - _retry_op connect(R r) && { - return {std::move(s_), std::move(r)}; - } - - decltype(auto) get_env() const noexcept { - return get_env(s_); - } -}; - -template -stdexec::sender auto retry(S s) { - return _retry_sender{std::move(s)}; -} -The retry algorithm takes a multi-shot sender and causes it to repeat on error, passing through values and stopped signals. Each time the input sender is restarted, a new receiver is connected and the resulting operation state is stored in an optional, which allows us to reinitialize it multiple times. - -This example does the following: - -Defines a _conv utility that takes advantage of C++17’s guaranteed copy elision to emplace a non-movable type in a std::optional. - -Defines a _retry_receiver that holds a pointer back to the operation state. It passes all customizations through unmodified to the inner receiver owned by the operation state except for set_error, which causes a _retry() function to be called instead. - -Defines an operation state that aggregates the input sender and receiver, and declares storage for the nested operation state in an optional. Constructing the operation state constructs a _retry_receiver with a pointer to the (under construction) operation state and uses it to connect to the input sender. - -Starting the operation state dispatches to start on the inner operation state. - -The _retry() function reinitializes the inner operation state by connecting the sender to a new receiver, holding a pointer back to the outer operation state as before. - -After reinitializing the inner operation state, _retry() calls start on it, causing the failed operation to be rescheduled. - -Defines a _retry_sender that implements a connect member function to return an operation state constructed from the passed-in sender and receiver. - -_retry_sender also implements a get_completion_signatures member function to describe the ways this sender may complete when executed in a particular execution resource. - -1.6. Examples: Schedulers -In this section we look at some schedulers of varying complexity. - -1.6.1. Inline scheduler -namespace stdexec = std::execution; - -class inline_scheduler { - template - struct _op { - using operation_state_concept = operation_state_t; - R rec_; - - void start() & noexcept { - stdexec::set_value(std::move(rec_)); - } - }; - - struct _env { - template - inline_scheduler query(stdexec::get_completion_scheduler_t) const noexcept { - return {}; - } - }; - - struct _sender { - using sender_concept = stdexec::sender_t; - using _compl_sigs = stdexec::completion_signatures; - using completion_signatures = _compl_sigs; - - template R> - _op connect(R rec) noexcept(std::is_nothrow_move_constructible_v) { - return {std::move(rec)}; - } - - _env get_env() const noexcept { - return {}; - } - }; - - public: - inline_scheduler() = default; - - _sender schedule() const noexcept { - return {}; - } - - bool operator==(const inline_scheduler&) const noexcept = default; -}; -The inline scheduler is a trivial scheduler that completes immediately and synchronously on the thread that calls std::execution::start on the operation state produced by its sender. In other words, start(connect(schedule(inline_scheduler()), receiver)) is just a fancy way of saying set_value(receiver), with the exception of the fact that start wants to be passed an lvalue. - -Although not a particularly useful scheduler, it serves to illustrate the basics of implementing one. The inline_scheduler: - -Customizes execution::schedule to return an instance of the sender type _sender. - -The _sender type models the sender concept and provides the metadata needed to describe it as a sender of no values and that never calls set_error or set_stopped. This metadata is provided with the help of the execution::completion_signatures utility. - -The _sender type customizes execution::connect to accept a receiver of no values. It returns an instance of type _op that holds the receiver by value. - -The operation state customizes std::execution::start to call std::execution::set_value on the receiver. - -1.6.2. Single thread scheduler -This example shows how to create a scheduler for an execution resource that consists of a single thread. It is implemented in terms of a lower-level execution resource called std::execution::run_loop. - -class single_thread_context { - std::execution::run_loop loop_; - std::thread thread_; - -public: - single_thread_context() - : loop_() - , thread_([this] { loop_.run(); }) - {} - single_thread_context(single_thread_context&&) = delete; - - ~single_thread_context() { - loop_.finish(); - thread_.join(); - } - - auto get_scheduler() noexcept { - return loop_.get_scheduler(); - } - - std::thread::id get_thread_id() const noexcept { - return thread_.get_id(); - } -}; -The single_thread_context owns an event loop and a thread to drive it. In the destructor, it tells the event loop to finish up what it’s doing and then joins the thread, blocking for the event loop to drain. - -The interesting bits are in the execution::run_loop context implementation. It is slightly too long to include here, so we only provide a reference to it, but there is one noteworthy detail about its implementation: It uses space in its operation states to build an intrusive linked list of work items. In structured concurrency patterns, the operation states of nested operations compose statically, and in an algorithm like this_thread::sync_wait, the composite operation state lives on the stack for the duration of the operation. The end result is that work can be scheduled onto this thread with zero allocations. - -1.7. Examples: Server theme -In this section we look at some examples of how one would use senders to implement an HTTP server. The examples ignore the low-level details of the HTTP server and looks at how senders can be combined to achieve the goals of the project. - -General application context: - -server application that processes images - -execution resources: - -1 dedicated thread for network I/O - -N worker threads used for CPU-intensive work - -M threads for auxiliary I/O - -optional GPU context that may be used on some types of servers - -all parts of the applications can be asynchronous - -no locks shall be used in user code - -1.7.1. Composability with execution::let_* -Example context: - -we are looking at the flow of processing an HTTP request and sending back the response. - -show how one can break the (slightly complex) flow into steps with execution::let_* functions. - -different phases of processing HTTP requests are broken down into separate concerns. - -each part of the processing might use different execution resources (details not shown in this example). - -error handling is generic, regardless which component fails; we always send the right response to the clients. - -Goals: - -show how one can break more complex flows into steps with let_* functions. - -exemplify the use of let_value, let_error, let_stopped, and just algorithms. - -namespace stdexec = std::execution; - -// Returns a sender that yields an http_request object for an incoming request -stdexec::sender auto schedule_request_start(read_requests_ctx ctx) {...} - -// Sends a response back to the client; yields a void signal on success -stdexec::sender auto send_response(const http_response& resp) {...} - -// Validate that the HTTP request is well-formed; forwards the request on success -stdexec::sender auto validate_request(const http_request& req) {...} - -// Handle the request; main application logic -stdexec::sender auto handle_request(const http_request& req) { - //... - return stdexec::just(http_response{200, result_body}); -} - -// Transforms server errors into responses to be sent to the client -stdexec::sender auto error_to_response(std::exception_ptr err) { - try { - std::rethrow_exception(err); - } catch (const std::invalid_argument& e) { - return stdexec::just(http_response{404, e.what()}); - } catch (const std::exception& e) { - return stdexec::just(http_response{500, e.what()}); - } catch (...) { - return stdexec::just(http_response{500, "Unknown server error"}); - } -} - -// Transforms cancellation of the server into responses to be sent to the client -stdexec::sender auto stopped_to_response() { - return stdexec::just(http_response{503, "Service temporarily unavailable"}); -} - -//... - -// The whole flow for transforming incoming requests into responses -stdexec::sender auto snd = - // get a sender when a new request comes - schedule_request_start(the_read_requests_ctx) - // make sure the request is valid; throw if not - | stdexec::let_value(validate_request) - // process the request in a function that may be using a different execution resource - | stdexec::let_value(handle_request) - // If there are errors transform them into proper responses - | stdexec::let_error(error_to_response) - // If the flow is cancelled, send back a proper response - | stdexec::let_stopped(stopped_to_response) - // write the result back to the client - | stdexec::let_value(send_response) - // done - ; - -// execute the whole flow asynchronously -stdexec::start_detached(std::move(snd)); -The example shows how one can separate out the concerns for interpreting requests, validating requests, running the main logic for handling the request, generating error responses, handling cancellation and sending the response back to the client. They are all different phases in the application, and can be joined together with the let_* functions. - -All our functions return execution::sender objects, so that they can all generate success, failure and cancellation paths. For example, regardless where an error is generated (reading request, validating request or handling the response), we would have one common block to handle the error, and following error flows is easy. - -Also, because of using execution::sender objects at any step, we might expect any of these steps to be completely asynchronous; the overall flow doesn’t care. Regardless of the execution resource in which the steps, or part of the steps are executed in, the flow is still the same. - -1.7.2. Moving between execution resources with execution::starts_on and execution::continues_on -Example context: - -reading data from the socket before processing the request - -reading of the data is done on the I/O context - -no processing of the data needs to be done on the I/O context - -Goals: - -show how one can change the execution resource - -exemplify the use of starts_on and continues_on algorithms - -namespace stdexec = std::execution; - -size_t legacy_read_from_socket(int sock, char* buffer, size_t buffer_len); -void process_read_data(const char* read_data, size_t read_len); -//... - -// A sender that just calls the legacy read function -auto snd_read = stdexec::just(sock, buf, buf_len) - | stdexec::then(legacy_read_from_socket); - -// The entire flow -auto snd = - // start by reading data on the I/O thread - stdexec::starts_on(io_sched, std::move(snd_read)) - // do the processing on the worker threads pool - | stdexec::continues_on(work_sched) - // process the incoming data (on worker threads) - | stdexec::then([buf](int read_len) { process_read_data(buf, read_len); }) - // done - ; - -// execute the whole flow asynchronously -stdexec::start_detached(std::move(snd)); -The example assume that we need to wrap some legacy code of reading sockets, and handle execution resource switching. (This style of reading from socket may not be the most efficient one, but it’s working for our purposes.) For performance reasons, the reading from the socket needs to be done on the I/O thread, and all the processing needs to happen on a work-specific execution resource (i.e., thread pool). - -Calling execution::starts_on will ensure that the given sender will be started on the given scheduler. In our example, snd_read is going to be started on the I/O scheduler. This sender will just call the legacy code. - -The completion-signal will be issued in the I/O execution resource, so we have to move it to the work thread pool. This is achieved with the help of the execution::continues_on algorithm. The rest of the processing (in our case, the last call to then) will happen in the work thread pool. - -The reader should notice the difference between execution::starts_on and execution::continues_on. The execution::starts_on algorithm will ensure that the given sender will start in the specified context, and doesn’t care where the completion-signal for that sender is sent. The execution::continues_on algorithm will not care where the given sender is going to be started, but will ensure that the completion-signal of will be transferred to the given context. - -1.8. Design changes from P0443 -The executor concept has been removed and all of its proposed functionality is now based on schedulers and senders, as per SG1 direction. - -Properties are not included in this paper. We see them as a possible future extension, if the committee gets more comfortable with them. - -Senders now advertise what scheduler, if any, their evaluation will complete on. - -The places of execution of user code in P0443 weren’t precisely defined, whereas they are in this paper. See § 4.5 Senders can propagate completion schedulers. - -P0443 did not propose a suite of sender algorithms necessary for writing sender code; this paper does. See § 4.19 User-facing sender factories, § 4.20 User-facing sender adaptors, and § 4.21 User-facing sender consumers. - -P0443 did not specify the semantics of variously qualified connect overloads; this paper does. See § 4.7 Senders can be either multi-shot or single-shot. - -This paper extends the sender traits/typed sender design to support typed senders whose value/error types depend on type information provided late via the receiver. - -Support for untyped senders is dropped; the typed_sender concept is renamed sender; sender_traits is replaced with completion_signatures_of_t. - -Specific type erasure facilities are omitted, as per LEWG direction. Type erasure facilities can be built on top of this proposal, as discussed in § 5.9 Customization points. - -A specific thread pool implementation is omitted, as per LEWG direction. - -Some additional utilities are added: - -run_loop: An execution resource that provides a multi-producer, single-consumer, first-in-first-out work queue. - -completion_signatures and transform_completion_signatures: Utilities for describing the ways in which a sender can complete in a declarative syntax. - -1.9. Prior art -This proposal builds upon and learns from years of prior art with asynchronous and parallel programming frameworks in C++. In this section, we discuss async abstractions that have previously been suggested as a possible basis for asynchronous algorithms and why they fall short. - -1.9.1. Futures -A future is a handle to work that has already been scheduled for execution. It is one end of a communication channel; the other end is a promise, used to receive the result from the concurrent operation and to communicate it to the future. - -Futures, as traditionally realized, require the dynamic allocation and management of a shared state, synchronization, and typically type-erasure of work and continuation. Many of these costs are inherent in the nature of "future" as a handle to work that is already scheduled for execution. These expenses rule out the future abstraction for many uses and makes it a poor choice for a basis of a generic mechanism. - -1.9.2. Coroutines -C++20 coroutines are frequently suggested as a basis for asynchronous algorithms. It’s fair to ask why, if we added coroutines to C++, are we suggesting the addition of a library-based abstraction for asynchrony. Certainly, coroutines come with huge syntactic and semantic advantages over the alternatives. - -Although coroutines are lighter weight than futures, coroutines suffer many of the same problems. Since they typically start suspended, they can avoid synchronizing the chaining of dependent work. However in many cases, coroutine frames require an unavoidable dynamic allocation and indirect function calls. This is done to hide the layout of the coroutine frame from the C++ type system, which in turn makes possible the separate compilation of coroutines and certain compiler optimizations, such as optimization of the coroutine frame size. - -Those advantages come at a cost, though. Because of the dynamic allocation of coroutine frames, coroutines in embedded or heterogeneous environments, which often lack support for dynamic allocation, require great attention to detail. And the allocations and indirections tend to complicate the job of the inliner, often resulting in sub-optimal codegen. - -The coroutine language feature mitigates these shortcomings somewhat with the HALO optimization Halo: coroutine Heap Allocation eLision Optimization: the joint response, which leverages existing compiler optimizations such as allocation elision and devirtualization to inline the coroutine, completely eliminating the runtime overhead. However, HALO requires a sophisiticated compiler, and a fair number of stars need to align for the optimization to kick in. In our experience, more often than not in real-world code today’s compilers are not able to inline the coroutine, resulting in allocations and indirections in the generated code. - -In a suite of generic async algorithms that are expected to be callable from hot code paths, the extra allocations and indirections are a deal-breaker. It is for these reasons that we consider coroutines a poor choice for a basis of all standard async. - -1.9.3. Callbacks -Callbacks are the oldest, simplest, most powerful, and most efficient mechanism for creating chains of work, but suffer problems of their own. Callbacks must propagate either errors or values. This simple requirement yields many different interface possibilities. The lack of a standard callback shape obstructs generic design. - -Additionally, few of these possibilities accommodate cancellation signals when the user requests upstream work to stop and clean up. - -1.10. Field experience -1.10.1. libunifex -This proposal draws heavily from our field experience with libunifex. Libunifex implements all of the concepts and customization points defined in this paper (with slight variations -- the design of P2300 has evolved due to LEWG feedback), many of this paper’s algorithms (some under different names), and much more besides. - -Libunifex has several concrete schedulers in addition to the run_loop suggested here (where it is called manual_event_loop). It has schedulers that dispatch efficiently to epoll and io_uring on Linux and the Windows Thread Pool on Windows. - -In addition to the proposed interfaces and the additional schedulers, it has several important extensions to the facilities described in this paper, which demonstrate directions in which these abstractions may be evolved over time, including: - -Timed schedulers, which permit scheduling work on an execution resource at a particular time or after a particular duration has elapsed. In addition, it provides time-based algorithms. - -File I/O schedulers, which permit filesystem I/O to be scheduled. - -Two complementary abstractions for streams (asynchronous ranges), and a set of stream-based algorithms. - -Libunifex has seen heavy production use at Meta. An employee summarizes it as follows: - -As of June, 2023, Unifex is still used in production at Meta. It’s used to express the asynchrony in rsys, and is therefore serving video calling to billions of people every month on Meta’s social networking apps on iOS, Android, Windows, and macOS. It’s also serving the Virtual Desktop experience on Oculus Quest devices, and some internal uses that run on Linux. - -One team at Meta has migrated from folly::Future to unifex::task and seen significant developer efficiency improvements. Coroutines are easier to understand than chained futures so the team was able to meet requirements for certain constrained environments that would have been too complicated to maintain with futures. - -In all the cases mentioned above, developers mix-and-match between the sender algorithms in Unifex and Unifex’s coroutine type, unifex::task. We also rely on unifex::task's scheduler affinity to minimize surprise when programming with coroutines. - -1.10.2. stdexec -stdexec is the reference implementation of this proposal. It is a complete implementation, written from the specification in this paper, and is current with \R8. - -The original purpose of stdexec was to help find specification bugs and to harden the wording of the proposal, but it has since become one of NVIDIA’s core C++ libraries for high-performance computing. In addition to the facilities proposed in this paper, stdexec has schedulers for CUDA, Intel TBB, and MacOS. Like libunifex, its scope has also expanded to include a streaming abstraction and stream algorithms, and time-based schedulers and algorithms. - -The stdexec project has seen lots of community interest and contributions. At the time of writing (March, 2024), the GitHub repository has 1.2k stars, 130 forks, and 50 contributors. - -stdexec is fit for broad use and for ultimate contribution to libc++. - -1.10.3. Other implementations -The authors are aware of a number of other implementations of sender/receiver from this paper. These are presented here in perceived order of maturity and field experience. - -HPX - The C++ Standard Library for Parallelism and Concurrency - -HPX is a general purpose C++ runtime system for parallel and distributed applications that has been under active development since 2007. HPX exposes a uniform, standards-oriented API, and keeps abreast of the latest standards and proposals. It is used in a wide variety of high-performance applications. - -The sender/receiver implementation in HPX has been under active development since May 2020. It is used to erase the overhead of futures and to make it possible to write efficient generic asynchronous algorithms that are agnostic to their execution resource. In HPX, algorithms can migrate execution between execution resources, even to GPUs and back, using a uniform standard interface with sender/receiver. - -Far and away, the HPX team has the greatest usage experience outside Facebook. Mikael Simberg summarizes the experience as follows: - -Summarizing, for us the major benefits of sender/receiver compared to the old model are: - -Proper hooks for transitioning between execution resources. - -The adaptors. Things like let_value are really nice additions. - -Separation of the error channel from the value channel (also cancellation, but we don’t have much use for it at the moment). Even from a teaching perspective having to explain that the future f2 in the continuation will always be ready here f1.then([](future f2) {...}) is enough of a reason to separate the channels. All the other obvious reasons apply as well of course. - -For futures we have a thing called hpx::dataflow which is an optimized version of when_all(...).then(...) which avoids intermediate allocations. With the sender/receiver when_all(...) | then(...) we get that "for free". - -kuhllib by Dietmar Kuehl - -This is a prototype Standard Template Library with an implementation of sender/receiver that has been under development since May, 2021. It is significant mostly for its support for sender/receiver-based networking interfaces. - -Here, Dietmar Kuehl speaks about the perceived complexity of sender/receiver: - -... and, also similar to STL: as I had tried to do things in that space before I recognize sender/receivers as being maybe complicated in one way but a huge simplification in another one: like with STL I think those who use it will benefit - if not from the algorithm from the clarity of abstraction: the separation of concerns of STL (the algorithm being detached from the details of the sequence representation) is a major leap. Here it is rather similar: the separation of the asynchronous algorithm from the details of execution. Sure, there is some glue to tie things back together but each of them is simpler than the combined result. - -Elsewhere, he said: - -... to me it feels like sender/receivers are like iterators when STL emerged: they are different from what everybody did in that space. However, everything people are already doing in that space isn’t right. - -Kuehl also has experience teaching sender/receiver at Bloomberg. About that experience he says: - -When I asked [my students] specifically about how complex they consider the sender/receiver stuff the feedback was quite unanimous that the sender/receiver parts aren’t trivial but not what contributes to the complexity. - -C++ Bare Metal Senders and Receivers from Intel - -This is a prototype implementation of sender/receiver by Intel that has been under development since August, 2023. It is significant mostly for its support for bare metal (no operating system) and embedded systems, a domain for which senders are particularly well-suited due to their very low dynamic memory requirements. - -1.10.4. Inspirations -This proposal also draws heavily from our experience with Thrust and Agency. It is also inspired by the needs of countless other C++ frameworks for asynchrony, parallelism, and concurrency, including: - -HPX - -Folly - -stlab - -2. Revision history -2.1. R10 -The changes since R9 are as follows: - -Fixes: - -Fixed connect and get_completion_signatures to use transform_sender, as "Sender Algorithm Customization" proposed (but failed) to do. See "Fixing Lazy Sender Algorithm Customization" for details. - -ensure_started, start_detached, execute, and execute_may_block_caller are removed from the proposal. They are to be replaced with safer and more structured APIs by "async_scope — Creating scopes for non-sequential concurrency". See "remove ensure_started and start_detached from P2300" for details. - -Fixed a logic error in the specification of split that could have caused a receiver to be completed twice in some cases. - -Fixed stopped_as_optional to handle the case where the child sender completes with more than one value, in which case the stopped_as_optional sender completes with an optional of a tuple of the values. - -The queryable, stoppable_source, and stoppable_callback_for concepts have been made exposition-only. - -Enhancements: - -The operation_state concept no longer requires that operation states model queryable. - -The get_delegatee_scheduler query has been renamed to get_delegation_scheduler. - -The read environment has been renamed to read_env. - -The nullary forms of the queries which returned instances of the read_env sender have been removed. That is, get_scheduler() is no longer another way to spell read_env(get_scheduler). Same for the other queries. - -A feature test macro has been added: __cpp_lib_senders. - -transfer has been renamed to continues_on. on has been renamed to starts_on. A new on algorithm has been added that is a combination of starts_on and continues_on for performing work on a different context and automatically transitioning back to the starting one. See "Reconsidering the std::execution::on algorithm" for details. - -An exposition-only simple-allocator concept is added to the Library introduction ([library]), and the specification of the get_allocator query is expressed in terms of it. - -An exposition-only write-env sender adaptor has been added for use in the implementation of the new on algorithm. - -2.2. R9 -The changes since R8 are as follows: - -Fixes: - -The tag_invoke mechanism has been replaced with member functions for customizations as per "Member customization points for Senders and Receivers". - -Per guidance from LWG and LEWG, receiver_adaptor has been removed. - -The receiver concept is tweaked to require that receiver types are not final. Without receiver_adaptor and tag_invoke, receiver adaptors are easily written using implementation inheritance. - -std::tag_t is made exposition-only. - -The types in_place_stop_token, in_place_stop_source, and in_place_stop_callback are renamed to inplace_stop_token, inplace_stop_source, and inplace_stop_callback, respectively. - -Enhancements: - -The specification of the sync_wait algorithm has been updated for clarity. - -The specification of all the stop token, source, and callback types have been re-expressed in terms of shared concepts. - -Declarations are shown in their proper namespaces. - -Editorial changes have been made to clarify what text is added, what is removed, and what is an editorial note. - -The section numbers of the proposed wording now match the section numbers in the working draft of the C++ standard. - -2.3. R8 -The changes since R7 are as follows: - -Fixes: - -get_env(obj) is required to be nothrow. - -get_env and the associated environment utilities are moved back into std::execution from std::. - -make_completion_signatures is renamed transform_completion_signatures_of and is expressed in terms of the new transform_completion_signatures, which takes an input set of completion signatures instead of a sender and an environment. - -Add a requirement on queryable objects that if tag_invoke(query, env, args...) is well-formed, then query(env, args...) is expression-equivalent to it. This is necessary to properly specify how to join two environments in the presence of queries that have defaults. - -The sender_in concept requires that E satisfies queryable. - -Senders of more than one value are now co_await-able in coroutines, the result of which is a std::tuple of the values (which is suitable as the initializer of a structured binding). - -Enhancements: - -The exposition-only class template basic-sender is greatly enhanced, and the sender algorithms are respecified in term of it. - -enable_sender and enable_receiver traits now have default implementations that look for nested sender_concept and receiver_concept types, respectively. - -2.4. R7 -The changes since R6 are as follows: - -Fixes: - -Make it valid to pass non-variadic templates to the exposition-only alias template gather-signatures, fixing the definitions of value_types_of_t, error_types_of_t, and the exposition-only alias template sync-wait-result-type. - -Removed the query forwarding from receiver_adaptor that was inadvertantly left over from a previous edit. - -When adapting a sender to an awaitable with as_awaitable, the sender’s value result datum is decayed before being stored in the exposition-only variant. - -Correctly specify the completion signatures of the schedule_from algorithm. - -The sender_of concept no longer distinguishes between a sender of a type T and a sender of a type T&&. - -The just and just_error sender factories now reject C-style arrays instead of silently decaying them to pointers. - -Enhancements: - -The sender and receiver concepts get explicit opt-in traits called enable_sender and enable_receiver, respectively. The traits have default implementations that look for nested is_sender and is_receiver types, respectively. - -get_attrs is removed and get_env is used in its place. - -The exposition-only type empty-env is made normative and is renamed empty_env. - -get_env gets a fall-back implementation that simply returns empty_env{} if a tag_invoke overload is not found. - -get_env is required to be insensitive to the cvref-qualification of its argument. - -get_env, empty_env, and env_of_t are moved into the std:: namespace. - -Add a new subclause describing the async programming model of senders in abstract terms. See § 34.3 Asynchronous operations [async.ops]. - -2.5. R6 -The changes since R5 are as follows: - -Fixes: - -Fix typo in the specification of in_place_stop_source about the relative lifetimes of the tokens and the source that produced them. - -get_completion_signatures tests for awaitability with a promise type similar to the one used by connect for the sake of consistency. - -A coroutine promise type is an environment provider (that is, it implements get_env()) rather than being directly queryable. The previous draft was inconsistent about that. - -Enhancements: - -Sender queries are moved into a separate queryable "attributes" object that is accessed by passing the sender to get_attrs() (see below). The sender concept is reexpressed to require get_attrs() and separated from a new sender_in concept for checking whether a type is a sender within a particular execution environment. - -The placeholder types no_env and dependent_completion_signatures<> are no longer needed and are dropped. - -ensure_started and split are changed to persist the result of calling get_attrs() on the input sender. - -Reorder constraints of the scheduler and receiver concepts to avoid constraint recursion when used in tandem with poorly-constrained, implicitly convertible types. - -Re-express the sender_of concept to be more ergonomic and general. - -Make the specification of the alias templates value_types_of_t and error_types_of_t, and the variable template sends_done more concise by expressing them in terms of a new exposition-only alias template gather-signatures. - -2.5.1. Environments and attributes -In earlier revisions, receivers, senders, and schedulers all were directly queryable. In R4, receiver queries were moved into a separate "environment" object, obtainable from a receiver with a get_env accessor. In R6, the sender queries are given similar treatment, relocating to a "attributes" object obtainable from a sender with a get_attrs accessor. This was done to solve a number of design problems with the split and ensure_started algorithms; e.g., see NVIDIA/stdexec#466. - -Schedulers, however, remain directly queryable. As lightweight handles that are required to be movable and copyable, there is little reason to want to dispose of a scheduler and yet persist the scheduler’s queries. - -This revision also makes operation states directly queryable, even though there isn’t yet a use for such. Some early prototypes of cooperative bulk parallel sender algorithms done at NVIDIA suggest the utility of forwardable operation state queries. The authors chose to make opstates directly queryable since the opstate object is itself required to be kept alive for the duration of asynchronous operation. - -2.6. R5 -The changes since R4 are as follows: - -Fixes: - -start_detached requires its argument to be a void sender (sends no values to set_value). - -Enhancements: - -Receiver concepts refactored to no longer require an error channel for exception_ptr or a stopped channel. - -sender_of concept and connect customization point additionally require that the receiver is capable of receiving all of the sender’s possible completions. - -get_completion_signatures is now required to return an instance of either completion_signatures or dependent_completion_signatures. - -make_completion_signatures made more general. - -receiver_adaptor handles get_env as it does the set_* members; that is, receiver_adaptor will look for a member named get_env() in the derived class, and if found dispatch the get_env_t tag invoke customization to it. - -just, just_error, just_stopped, and into_variant have been respecified as customization point objects instead of functions, following LEWG guidance. - -2.7. R4 -The changes since R3 are as follows: - -Fixes: - -Fix specification of get_completion_scheduler on the transfer, schedule_from and transfer_when_all algorithms; the completion scheduler cannot be guaranteed for set_error. - -The value of sends_stopped for the default sender traits of types that are generally awaitable was changed from false to true to acknowledge the fact that some coroutine types are generally awaitable and may implement the unhandled_stopped() protocol in their promise types. - -Fix the incorrect use of inline namespaces in the header. - -Shorten the stable names for the sections. - -sync_wait now handles std::error_code specially by throwing a std::system_error on failure. - -Fix how ADL isolation from class template arguments is specified so it doesn’t constrain implmentations. - -Properly expose the tag types in the header synopsis. - -Enhancements: - -Support for "dependently-typed" senders, where the completion signatures -- and thus the sender metadata -- depend on the type of the receiver connected to it. See the section dependently-typed senders below for more information. - -Add a read(query) sender factory for issuing a query against a receiver and sending the result through the value channel. (This is a useful instance of a dependently-typed sender.) - -Add completion_signatures utility for declaratively defining a typed sender’s metadata. - -Add make_completion_signatures utility for specifying a sender’s completion signatures by adapting those of another sender. - -Drop support for untyped senders and rename typed_sender to sender. - -set_done is renamed to set_stopped. All occurances of "done" in indentifiers replaced with "stopped" - -Add customization points for controlling the forwarding of scheduler, sender, receiver, and environment queries through layers of adaptors; specify the behavior of the standard adaptors in terms of the new customization points. - -Add get_delegatee_scheduler query to forward a scheduler that can be used by algorithms or by the scheduler to delegate work and forward progress. - -Add schedule_result_t alias template. - -More precisely specify the sender algorithms, including precisely what their completion signatures are. - -stopped_as_error respecified as a customization point object. - -tag_invoke respecified to improve diagnostics. - -2.7.1. Dependently-typed senders -Background: - -In the sender/receiver model, as with coroutines, contextual information about the current execution is most naturally propagated from the consumer to the producer. In coroutines, that means information like stop tokens, allocators and schedulers are propagated from the calling coroutine to the callee. In sender/receiver, that means that that contextual information is associated with the receiver and is queried by the sender and/or operation state after the sender and the receiver are connect-ed. - -Problem: - -The implication of the above is that the sender alone does not have all the information about the async computation it will ultimately initiate; some of that information is provided late via the receiver. However, the sender_traits mechanism, by which an algorithm can introspect the value and error types the sender will propagate, only accepts a sender parameter. It does not take into consideration the type information that will come in late via the receiver. The effect of this is that some senders cannot be typed senders when they otherwise could be. - -Example: - -To get concrete, consider the case of the "get_scheduler()" sender: when connect-ed and start-ed, it queries the receiver for its associated scheduler and passes it back to the receiver through the value channel. That sender’s "value type" is the type of the receiver’s scheduler. What then should sender_traits::value_types report for the get_scheduler()'s value type? It can’t answer because it doesn’t know. - -This causes knock-on problems since some important algorithms require a typed sender, such as sync_wait. To illustrate the problem, consider the following code: - -namespace ex = std::execution; - -ex::sender auto task = - ex::let_value( - ex::get_scheduler(), // Fetches scheduler from receiver. - [](auto current_sched) { - // Lauch some nested work on the current scheduler: - return ex::starts_on(current_sched, nested work...); - }); - -std::this_thread::sync_wait(std::move(task)); -The code above is attempting to schedule some work onto the sync_wait's run_loop execution resource. But let_value only returns a typed sender when the input sender is typed. As we explained above, get_scheduler() is not typed, so task is likewise not typed. Since task isn’t typed, it cannot be passed to sync_wait which is expecting a typed sender. The above code would fail to compile. - -Solution: - -The solution is conceptually quite simple: extend the sender_traits mechanism to optionally accept a receiver in addition to the sender. The algorithms can use sender_traits to inspect the async operation’s completion-signals. The typed_sender concept would also need to take an optional receiver parameter. This is the simplest change, and it would solve the immediate problem. - -Design: - -Using the receiver type to compute the sender traits turns out to have pitfalls in practice. Many receivers make use of that type information in their implementation. It is very easy to create cycles in the type system, leading to inscrutible errors. The design pursued in R4 is to give receivers an associated environment object -- a bag of key/value pairs -- and to move the contextual information (schedulers, etc) out of the receiver and into the environment. The sender_traits template and the typed_sender concept, rather than taking a receiver, take an environment. This is a much more robust design. - -A further refinement of this design would be to separate the receiver and the environment entirely, passing then as separate arguments along with the sender to connect. This paper does not propose that change. - -Impact: - -This change, apart from increasing the expressive power of the sender/receiver abstraction, has the following impact: - -Typed senders become moderately more challenging to write. (The new completion_signatures and transform_completion_signatures utilities are added to ease this extra burden.) - -Sender adaptor algorithms that previously constrained their sender arguments to satisfy the typed_sender concept can no longer do so as the receiver is not available yet. This can result in type-checking that is done later, when connect is ultimately called on the resulting sender adaptor. - -Operation states that own receivers that add to or change the environment are typically larger by one pointer. It comes with the benefit of far fewer indirections to evaluate queries. - -"Has it been implemented?" - -Yes, the reference implementation, which can be found at https://github.com/NVIDIA/stdexec, has implemented this design as well as some dependently-typed senders to confirm that it works. - -Implementation experience - -Although this change has not yet been made in libunifex, the most widely adopted sender/receiver implementation, a similar design can be found in Folly’s coroutine support library. In Folly.Coro, it is possible to await a special awaitable to obtain the current coroutine’s associated scheduler (called an executor in Folly). - -For instance, the following Folly code grabs the current executor, schedules a task for execution on that executor, and starts the resulting (scheduled) task by enqueueing it for execution. - -// From Facebook’s Folly open source library: -template -folly::coro::Task CancellableAsyncScope::co_schedule(folly::coro::Task&& task) { - this->add(std::move(task).scheduleOn(co_await co_current_executor)); - co_return; -} -Facebook relies heavily on this pattern in its coroutine code. But as described above, this pattern doesn’t work with R3 of std::execution because of the lack of dependently-typed schedulers. The change to sender_traits in R4 rectifies that. - -Why now? - -The authors are loathe to make any changes to the design, however small, at this stage of the C++23 release cycle. But we feel that, for a relatively minor design change -- adding an extra template parameter to sender_traits and typed_sender -- the returns are large enough to justify the change. And there is no better time to make this change than as early as possible. - -One might wonder why this missing feature not been added to sender/receiver before now. The designers of sender/receiver have long been aware of the need. What was missing was a clean, robust, and simple design for the change, which we now have. - -Drive-by: - -We took the opportunity to make an additional drive-by change: Rather than providing the sender traits via a class template for users to specialize, we changed it into a sender query: get_completion_signatures(sender, env). That function’s return type is used as the sender’s traits. The authors feel this leads to a more uniform design and gives sender authors a straightforward way to make the value/error types dependent on the cv- and ref-qualification of the sender if need be. - -Details: - -Below are the salient parts of the new support for dependently-typed senders in R4: - -Receiver queries have been moved from the receiver into a separate environment object. - -Receivers have an associated environment. The new get_env CPO retrieves a receiver’s environment. If a receiver doesn’t implement get_env, it returns an unspecified "empty" environment -- an empty struct. - -sender_traits now takes an optional Env parameter that is used to determine the error/value types. - -The primary sender_traits template is replaced with a completion_signatures_of_t alias implemented in terms of a new get_completion_signatures CPO that dispatches with tag_invoke. get_completion_signatures takes a sender and an optional environment. A sender can customize this to specify its value/error types. - -Support for untyped senders is dropped. The typed_sender concept has been renamed to sender and now takes an optional environment. - -The environment argument to the sender concept and the get_completion_signatures CPO defaults to no_env. All environment queries fail (are ill-formed) when passed an instance of no_env. - -A type S is required to satisfy sender to be considered a sender. If it doesn’t know what types it will complete with independent of an environment, it returns an instance of the placeholder traits dependent_completion_signatures. - -If a sender satisfies both sender and sender, then the completion signatures for the two cannot be different in any way. It is possible for an implementation to enforce this statically, but not required. - -All of the algorithms and examples have been updated to work with dependently-typed senders. - -2.8. R3 -The changes since R2 are as follows: - -Fixes: - -Fix specification of the starts_on algorithm to clarify lifetimes of intermediate operation states and properly scope the get_scheduler query. - -Fix a memory safety bug in the implementation of connect-awaitable. - -Fix recursive definition of the scheduler concept. - -Enhancements: - -Add run_loop execution resource. - -Add receiver_adaptor utility to simplify writing receivers. - -Require a scheduler’s sender to model sender_of and provide a completion scheduler. - -Specify the cancellation scope of the when_all algorithm. - -Make as_awaitable a customization point. - -Change connect's handling of awaitables to consider those types that are awaitable owing to customization of as_awaitable. - -Add value_types_of_t and error_types_of_t alias templates; rename stop_token_type_t to stop_token_of_t. - -Add a design rationale for the removal of the possibly eager algorithms. - -Expand the section on field experience. - -2.9. R2 -The changes since R1 are as follows: - -Remove the eagerly executing sender algorithms. - -Extend the execution::connect customization point and the sender_traits<> template to recognize awaitables as typed_senders. - -Add utilities as_awaitable() and with_awaitable_senders<> so a coroutine type can trivially make senders awaitable with a coroutine. - -Add a section describing the design of the sender/awaitable interactions. - -Add a section describing the design of the cancellation support in sender/receiver. - -Add a section showing examples of simple sender adaptor algorithms. - -Add a section showing examples of simple schedulers. - -Add a few more examples: a sudoku solver, a parallel recursive file copy, and an echo server. - -Refined the forward progress guarantees on the bulk algorithm. - -Add a section describing how to use a range of senders to represent async sequences. - -Add a section showing how to use senders to represent partial success. - -Add sender factories execution::just_error and execution::just_stopped. - -Add sender adaptors execution::stopped_as_optional and execution::stopped_as_error. - -Document more production uses of sender/receiver at scale. - -Various fixes of typos and bugs. - -2.10. R1 -The changes since R0 are as follows: - -Added a new concept, sender_of. - -Added a new scheduler query, this_thread::execute_may_block_caller. - -Added a new scheduler query, get_forward_progress_guarantee. - -Removed the unschedule adaptor. - -Various fixes of typos and bugs. - -2.11. R0 -Initial revision. - -3. Design - introduction -The following three sections describe the entirety of the proposed design. - -§ 3 Design - introduction describes the conventions used through the rest of the design sections, as well as an example illustrating how we envision code will be written using this proposal. - -§ 4 Design - user side describes all the functionality from the perspective we intend for users: it describes the various concepts they will interact with, and what their programming model is. - -§ 5 Design - implementer side describes the machinery that allows for that programming model to function, and the information contained there is necessary for people implementing senders and sender algorithms (including the standard library ones) - but is not necessary to use senders productively. - -3.1. Conventions -The following conventions are used throughout the design section: - -The namespace proposed in this paper is the same as in A Unified Executors Proposal for C++: std::execution; however, for brevity, the std:: part of this name is omitted. When you see execution::foo, treat that as std::execution::foo. - -Universal references and explicit calls to std::move/std::forward are omitted in code samples and signatures for simplicity; assume universal references and perfect forwarding unless stated otherwise. - -None of the names proposed here are names that we are particularly attached to; consider the names to be reasonable placeholders that can freely be changed, should the committee want to do so. - -3.2. Queries and algorithms -A query is a callable that takes some set of objects (usually one) as parameters and returns facts about those objects without modifying them. Queries are usually customization point objects, but in some cases may be functions. - -An algorithm is a callable that takes some set of objects as parameters and causes those objects to do something. Algorithms are usually customization point objects, but in some cases may be functions. - -4. Design - user side -4.1. Execution resources describe the place of execution -An execution resource is a resource that represents the place where execution will happen. This could be a concrete resource - like a specific thread pool object, or a GPU - or a more abstract one, like the current thread of execution. Execution contexts don’t need to have a representation in code; they are simply a term describing certain properties of execution of a function. - -4.2. Schedulers represent execution resources -A scheduler is a lightweight handle that represents a strategy for scheduling work onto an execution resource. Since execution resources don’t necessarily manifest in C++ code, it’s not possible to program directly against their API. A scheduler is a solution to that problem: the scheduler concept is defined by a single sender algorithm, schedule, which returns a sender that will complete on an execution resource determined by the scheduler. Logic that you want to run on that context can be placed in the receiver’s completion-signalling method. - -execution::scheduler auto sch = thread_pool.scheduler(); -execution::sender auto snd = execution::schedule(sch); -// snd is a sender (see below) describing the creation of a new execution resource -// on the execution resource associated with sch -Note that a particular scheduler type may provide other kinds of scheduling operations which are supported by its associated execution resource. It is not limited to scheduling purely using the execution::schedule API. - -Future papers will propose additional scheduler concepts that extend scheduler to add other capabilities. For example: - -A time_scheduler concept that extends scheduler to support time-based scheduling. Such a concept might provide access to schedule_after(sched, duration), schedule_at(sched, time_point) and now(sched) APIs. - -Concepts that extend scheduler to support opening, reading and writing files asynchronously. - -Concepts that extend scheduler to support connecting, sending data and receiving data over the network asynchronously. - -4.3. Senders describe work -A sender is an object that describes work. Senders are similar to futures in existing asynchrony designs, but unlike futures, the work that is being done to arrive at the values they will send is also directly described by the sender object itself. A sender is said to send some values if a receiver connected (see § 5.3 execution::connect) to that sender will eventually receive said values. - -The primary defining sender algorithm is § 5.3 execution::connect; this function, however, is not a user-facing API; it is used to facilitate communication between senders and various sender algorithms, but end user code is not expected to invoke it directly. - -The way user code is expected to interact with senders is by using sender algorithms. This paper proposes an initial set of such sender algorithms, which are described in § 4.4 Senders are composable through sender algorithms, § 4.19 User-facing sender factories, § 4.20 User-facing sender adaptors, and § 4.21 User-facing sender consumers. For example, here is how a user can create a new sender on a scheduler, attach a continuation to it, and then wait for execution of the continuation to complete: - -execution::scheduler auto sch = thread_pool.scheduler(); -execution::sender auto snd = execution::schedule(sch); -execution::sender auto cont = execution::then(snd, []{ - std::fstream file{ "result.txt" }; - file << compute_result; -}); - -this_thread::sync_wait(cont); -// at this point, cont has completed execution -4.4. Senders are composable through sender algorithms -Asynchronous programming often departs from traditional code structure and control flow that we are familiar with. A successful asynchronous framework must provide an intuitive story for composition of asynchronous work: expressing dependencies, passing objects, managing object lifetimes, etc. - -The true power and utility of senders is in their composability. With senders, users can describe generic execution pipelines and graphs, and then run them on and across a variety of different schedulers. Senders are composed using sender algorithms: - -sender factories, algorithms that take no senders and return a sender. - -sender adaptors, algorithms that take (and potentially execution::connect) senders and return a sender. - -sender consumers, algorithms that take (and potentially execution::connect) senders and do not return a sender. - -4.5. Senders can propagate completion schedulers -One of the goals of executors is to support a diverse set of execution resources, including traditional thread pools, task and fiber frameworks (like HPX Legion), and GPUs and other accelerators (managed by runtimes such as CUDA or SYCL). On many of these systems, not all execution agents are created equal and not all functions can be run on all execution agents. Having precise control over the execution resource used for any given function call being submitted is important on such systems, and the users of standard execution facilities will expect to be able to express such requirements. - -A Unified Executors Proposal for C++ was not always clear about the place of execution of any given piece of code. Precise control was present in the two-way execution API present in earlier executor designs, but it has so far been missing from the senders design. There has been a proposal (Towards C++23 executors: A proposal for an initial set of algorithms) to provide a number of sender algorithms that would enforce certain rules on the places of execution of the work described by a sender, but we have found those sender algorithms to be insufficient for achieving the best performance on all platforms that are of interest to us. The implementation strategies that we are aware of result in one of the following situations: - -trying to submit work to one execution resource (such as a CPU thread pool) from another execution resource (such as a GPU or a task framework), which assumes that all execution agents are as capable as a std::thread (which they aren’t). - -forcibly interleaving two adjacent execution graph nodes that are both executing on one execution resource (such as a GPU) with glue code that runs on another execution resource (such as a CPU), which is prohibitively expensive for some execution resources (such as CUDA or SYCL). - -having to customise most or all sender algorithms to support an execution resource, so that you can avoid problems described in 1. and 2, which we believe is impractical and brittle based on months of field experience attempting this in Agency. - -None of these implementation strategies are acceptable for many classes of parallel runtimes, such as task frameworks (like HPX) or accelerator runtimes (like CUDA or SYCL). - -Therefore, in addition to the starts_on sender algorithm from Towards C++23 executors: A proposal for an initial set of algorithms, we are proposing a way for senders to advertise what scheduler (and by extension what execution resource) they will complete on. Any given sender may have completion schedulers for some or all of the signals (value, error, or stopped) it completes with (for more detail on the completion-signals, see § 5.1 Receivers serve as glue between senders). When further work is attached to that sender by invoking sender algorithms, that work will also complete on an appropriate completion scheduler. - -4.5.1. execution::get_completion_scheduler -get_completion_scheduler is a query that retrieves the completion scheduler for a specific completion-signal from a sender’s environment. For a sender that lacks a completion scheduler query for a given signal, calling get_completion_scheduler is ill-formed. If a sender advertises a completion scheduler for a signal in this way, that sender must ensure that it sends that signal on an execution agent belonging to an execution resource represented by a scheduler returned from this function. See § 4.5 Senders can propagate completion schedulers for more details. - -execution::scheduler auto cpu_sched = new_thread_scheduler{}; -execution::scheduler auto gpu_sched = cuda::scheduler(); - -execution::sender auto snd0 = execution::schedule(cpu_sched); -execution::scheduler auto completion_sch0 = - execution::get_completion_scheduler(get_env(snd0)); -// completion_sch0 is equivalent to cpu_sched - -execution::sender auto snd1 = execution::then(snd0, []{ - std::cout << "I am running on cpu_sched!\n"; -}); -execution::scheduler auto completion_sch1 = - execution::get_completion_scheduler(get_env(snd1)); -// completion_sch1 is equivalent to cpu_sched - -execution::sender auto snd2 = execution::continues_on(snd1, gpu_sched); -execution::sender auto snd3 = execution::then(snd2, []{ - std::cout << "I am running on gpu_sched!\n"; -}); -execution::scheduler auto completion_sch3 = - execution::get_completion_scheduler(get_env(snd3)); -// completion_sch3 is equivalent to gpu_sched -4.6. Execution resource transitions are explicit -A Unified Executors Proposal for C++ does not contain any mechanisms for performing an execution resource transition. The only sender algorithm that can create a sender that will move execution to a specific execution resource is execution::schedule, which does not take an input sender. That means that there’s no way to construct sender chains that traverse different execution resources. This is necessary to fulfill the promise of senders being able to replace two-way executors, which had this capability. - -We propose that, for senders advertising their completion scheduler, all execution resource transitions must be explicit; running user code anywhere but where they defined it to run must be considered a bug. - -The execution::continues_on sender adaptor performs a transition from one execution resource to another: - -execution::scheduler auto sch1 = ...; -execution::scheduler auto sch2 = ...; - -execution::sender auto snd1 = execution::schedule(sch1); -execution::sender auto then1 = execution::then(snd1, []{ - std::cout << "I am running on sch1!\n"; -}); - -execution::sender auto snd2 = execution::continues_on(then1, sch2); -execution::sender auto then2 = execution::then(snd2, []{ - std::cout << "I am running on sch2!\n"; -}); - -this_thread::sync_wait(then2); -4.7. Senders can be either multi-shot or single-shot -Some senders may only support launching their operation a single time, while others may be repeatable and support being launched multiple times. Executing the operation may consume resources owned by the sender. - -For example, a sender may contain a std::unique_ptr that it will be transferring ownership of to the operation-state returned by a call to execution::connect so that the operation has access to this resource. In such a sender, calling execution::connect consumes the sender such that after the call the input sender is no longer valid. Such a sender will also typically be move-only so that it can maintain unique ownership of that resource. - -A single-shot sender can only be connected to a receiver at most once. Its implementation of execution::connect only has overloads for an rvalue-qualified sender. Callers must pass the sender as an rvalue to the call to execution::connect, indicating that the call consumes the sender. - -A multi-shot sender can be connected to multiple receivers and can be launched multiple times. Multi-shot senders customise execution::connect to accept an lvalue reference to the sender. Callers can indicate that they want the sender to remain valid after the call to execution::connect by passing an lvalue reference to the sender to call these overloads. Multi-shot senders should also define overloads of execution::connect that accept rvalue-qualified senders to allow the sender to be also used in places where only a single-shot sender is required. - -If the user of a sender does not require the sender to remain valid after connecting it to a receiver then it can pass an rvalue-reference to the sender to the call to execution::connect. Such usages should be able to accept either single-shot or multi-shot senders. - -If the caller does wish for the sender to remain valid after the call then it can pass an lvalue-qualified sender to the call to execution::connect. Such usages will only accept multi-shot senders. - -Algorithms that accept senders will typically either decay-copy an input sender and store it somewhere for later usage (for example as a data-member of the returned sender) or will immediately call execution::connect on the input sender, such as in this_thread::sync_wait. - -Some multi-use sender algorithms may require that an input sender be copy-constructible but will only call execution::connect on an rvalue of each copy, which still results in effectively executing the operation multiple times. Other multi-use sender algorithms may require that the sender is move-constructible but will invoke execution::connect on an lvalue reference to the sender. - -For a sender to be usable in both multi-use scenarios, it will generally be required to be both copy-constructible and lvalue-connectable. - -4.8. Senders are forkable -Any non-trivial program will eventually want to fork a chain of senders into independent streams of work, regardless of whether they are single-shot or multi-shot. For instance, an incoming event to a middleware system may be required to trigger events on more than one downstream system. This requires that we provide well defined mechanisms for making sure that connecting a sender multiple times is possible and correct. - -The split sender adaptor facilitates connecting to a sender multiple times, regardless of whether it is single-shot or multi-shot: - -auto some_algorithm(execution::sender auto&& input) { - execution::sender auto multi_shot = split(input); - // "multi_shot" is guaranteed to be multi-shot, - // regardless of whether "input" was multi-shot or not - - return when_all( - then(multi_shot, [] { std::cout << "First continuation\n"; }), - then(multi_shot, [] { std::cout << "Second continuation\n"; }) - ); -} -4.9. Senders support cancellation -Senders are often used in scenarios where the application may be concurrently executing multiple strategies for achieving some program goal. When one of these strategies succeeds (or fails) it may not make sense to continue pursuing the other strategies as their results are no longer useful. - -For example, we may want to try to simultaneously connect to multiple network servers and use whichever server responds first. Once the first server responds we no longer need to continue trying to connect to the other servers. - -Ideally, in these scenarios, we would somehow be able to request that those other strategies stop executing promptly so that their resources (e.g. cpu, memory, I/O bandwidth) can be released and used for other work. - -While the design of senders has support for cancelling an operation before it starts by simply destroying the sender or the operation-state returned from execution::connect() before calling execution::start(), there also needs to be a standard, generic mechanism to ask for an already-started operation to complete early. - -The ability to be able to cancel in-flight operations is fundamental to supporting some kinds of generic concurrency algorithms. - -For example: - -a when_all(ops...) algorithm should cancel other operations as soon as one operation fails - -a first_successful(ops...) algorithm should cancel the other operations as soon as one operation completes successfuly - -a generic timeout(src, duration) algorithm needs to be able to cancel the src operation after the timeout duration has elapsed. - -a stop_when(src, trigger) algorithm should cancel src if trigger completes first and cancel trigger if src completes first - -The mechanism used for communcating cancellation-requests, or stop-requests, needs to have a uniform interface so that generic algorithms that compose sender-based operations, such as the ones listed above, are able to communicate these cancellation requests to senders that they don’t know anything about. - -The design is intended to be composable so that cancellation of higher-level operations can propagate those cancellation requests through intermediate layers to lower-level operations that need to actually respond to the cancellation requests. - -For example, we can compose the algorithms mentioned above so that child operations are cancelled when any one of the multiple cancellation conditions occurs: - -sender auto composed_cancellation_example(auto query) { - return stop_when( - timeout( - when_all( - first_successful( - query_server_a(query), - query_server_b(query)), - load_file("some_file.jpg")), - 5s), - cancelButton.on_click()); -} -In this example, if we take the operation returned by query_server_b(query), this operation will receive a stop-request when any of the following happens: - -first_successful algorithm will send a stop-request if query_server_a(query) completes successfully - -when_all algorithm will send a stop-request if the load_file("some_file.jpg") operation completes with an error or stopped result. - -timeout algorithm will send a stop-request if the operation does not complete within 5 seconds. - -stop_when algorithm will send a stop-request if the user clicks on the "Cancel" button in the user-interface. - -The parent operation consuming the composed_cancellation_example() sends a stop-request - -Note that within this code there is no explicit mention of cancellation, stop-tokens, callbacks, etc. yet the example fully supports and responds to the various cancellation sources. - -The intent of the design is that the common usage of cancellation in sender/receiver-based code is primarily through use of concurrency algorithms that manage the detailed plumbing of cancellation for you. Much like algorithms that compose senders relieve the user from having to write their own receiver types, algorithms that introduce concurrency and provide higher-level cancellation semantics relieve the user from having to deal with low-level details of cancellation. - -4.9.1. Cancellation design summary -The design of cancellation described in this paper is built on top of and extends the std::stop_token-based cancellation facilities added in C++20, first proposed in Composable cancellation for sender-based async operations. - -At a high-level, the facilities proposed by this paper for supporting cancellation include: - -Add a std::stoppable_token concept that generalises the interface of the std::stop_token type to allow other stop token types with different implementation strategies. - -Add std::unstoppable_token concept for detecting whether a stoppable_token can never receive a stop-request. - -Add std::inplace_stop_token, std::inplace_stop_source and std::inplace_stop_callback types that provide a more efficient implementation of a stop-token for use in structured concurrency situations. - -Add std::never_stop_token for use in places where you never want to issue a stop-request. - -Add std::execution::get_stop_token() CPO for querying the stop-token to use for an operation from its receiver’s execution environment. - -Add std::execution::stop_token_of_t for querying the type of a stop-token returned from get_stop_token(). - -In addition, there are requirements added to some of the algorithms to specify what their cancellation behaviour is and what the requirements of customisations of those algorithms are with respect to cancellation. - -The key component that enables generic cancellation within sender-based operations is the execution::get_stop_token() CPO. This CPO takes a single parameter, which is the execution environment of the receiver passed to execution::connect, and returns a std::stoppable_token that the operation can use to check for stop-requests for that operation. - -As the caller of execution::connect typically has control over the receiver type it passes, it is able to customise the std::execution::get_env() CPO for that receiver to return an execution environment that hooks the execution::get_stop_token() CPO to return a stop-token that the receiver has control over and that it can use to communicate a stop-request to the operation once it has started. - -4.9.2. Support for cancellation is optional -Support for cancellation is optional, both on part of the author of the receiver and on part of the author of the sender. - -If the receiver’s execution environment does not customise the execution::get_stop_token() CPO then invoking the CPO on that receiver’s environment will invoke the default implementation which returns std::never_stop_token. This is a special stoppable_token type that is statically known to always return false from the stop_possible() method. - -Sender code that tries to use this stop-token will in general result in code that handles stop-requests being compiled out and having little to no run-time overhead. - -If the sender doesn’t call execution::get_stop_token(), for example because the operation does not support cancellation, then it will simply not respond to stop-requests from the caller. - -Note that stop-requests are generally racy in nature as there is often a race betwen an operation completing naturally and the stop-request being made. If the operation has already completed or past the point at which it can be cancelled when the stop-request is sent then the stop-request may just be ignored. An application will typically need to be able to cope with senders that might ignore a stop-request anyway. - -4.9.3. Cancellation is inherently racy -Usually, an operation will attach a stop callback at some point inside the call to execution::start() so that a subsequent stop-request will interrupt the logic. - -A stop-request can be issued concurrently from another thread. This means the implementation of execution::start() needs to be careful to ensure that, once a stop callback has been registered, that there are no data-races between a potentially concurrently-executing stop callback and the rest of the execution::start() implementation. - -An implementation of execution::start() that supports cancellation will generally need to perform (at least) two separate steps: launch the operation, subscribe a stop callback to the receiver’s stop-token. Care needs to be taken depending on the order in which these two steps are performed. - -If the stop callback is subscribed first and then the operation is launched, care needs to be taken to ensure that a stop-request that invokes the stop callback on another thread after the stop callback is registered but before the operation finishes launching does not either result in a missed cancellation request or a data-race. e.g. by performing an atomic write after the launch has finished executing - -If the operation is launched first and then the stop callback is subscribed, care needs to be taken to ensure that if the launched operation completes concurrently on another thread that it does not destroy the operation-state until after the stop callback has been registered. e.g. by having the execution::start implementation write to an atomic variable once it has finished registering the stop callback and having the concurrent completion handler check that variable and either call the completion-signalling operation or store the result and defer calling the receiver’s completion-signalling operation to the execution::start() call (which is still executing). - -For an example of an implementation strategy for solving these data-races see § 1.4 Asynchronous Windows socket recv. - -4.9.4. Cancellation design status -This paper currently includes the design for cancellation as proposed in Composable cancellation for sender-based async operations - "Composable cancellation for sender-based async operations". P2175R0 contains more details on the background motivation and prior-art and design rationale of this design. - -It is important to note, however, that initial review of this design in the SG1 concurrency subgroup raised some concerns related to runtime overhead of the design in single-threaded scenarios and these concerns are still being investigated. - -The design of P2175R0 has been included in this paper for now, despite its potential to change, as we believe that support for cancellation is a fundamental requirement for an async model and is required in some form to be able to talk about the semantics of some of the algorithms proposed in this paper. - -This paper will be updated in the future with any changes that arise from the investigations into P2175R0. - -4.10. Sender factories and adaptors are lazy -In an earlier revision of this paper, some of the proposed algorithms supported executing their logic eagerly; i.e., before the returned sender has been connected to a receiver and started. These algorithms were removed because eager execution has a number of negative semantic and performance implications. - -We have originally included this functionality in the paper because of a long-standing belief that eager execution is a mandatory feature to be included in the standard Executors facility for that facility to be acceptable for accelerator vendors. A particular concern was that we must be able to write generic algorithms that can run either eagerly or lazily, depending on the kind of an input sender or scheduler that have been passed into them as arguments. We considered this a requirement, because the _latency_ of launching work on an accelerator can sometimes be considerable. - -However, in the process of working on this paper and implementations of the features proposed within, our set of requirements has shifted, as we understood the different implementation strategies that are available for the feature set of this paper better, and, after weighing the earlier concerns against the points presented below, we have arrived at the conclusion that a purely lazy model is enough for most algorithms, and users who intend to launch work earlier may write an algorithm to achieve that goal. We have also come to deeply appreciate the fact that a purely lazy model allows both the implementation and the compiler to have a much better understanding of what the complete graph of tasks looks like, allowing them to better optimize the code - also when targetting accelerators. - -4.10.1. Eager execution leads to detached work or worse -One of the questions that arises with APIs that can potentially return eagerly-executing senders is "What happens when those senders are destructed without a call to execution::connect?" or similarly, "What happens if a call to execution::connect is made, but the returned operation state is destroyed before execution::start is called on that operation state"? - -In these cases, the operation represented by the sender is potentially executing concurrently in another thread at the time that the destructor of the sender and/or operation-state is running. In the case that the operation has not completed executing by the time that the destructor is run we need to decide what the semantics of the destructor is. - -There are three main strategies that can be adopted here, none of which is particularly satisfactory: - -Make this undefined-behaviour - the caller must ensure that any eagerly-executing sender is always joined by connecting and starting that sender. This approach is generally pretty hostile to programmers, particularly in the presence of exceptions, since it complicates the ability to compose these operations. - -Eager operations typically need to acquire resources when they are first called in order to start the operation early. This makes eager algorithms prone to failure. Consider, then, what might happen in an expression such as when_all(eager_op_1(), eager_op_2()). Imagine eager_op_1() starts an asynchronous operation successfully, but then eager_op_2() throws. For lazy senders, that failure happens in the context of the when_all algorithm, which handles the failure and ensures that async work joins on all code paths. In this case though -- the eager case -- the child operation has failed even before when_all has been called. - -It then becomes the responsibility, not of the algorithm, but of the end user to handle the exception and ensure that eager_op_1() is joined before allowing the exception to propagate. If they fail to do that, they incur undefined behavior. - -Detach from the computation - let the operation continue in the background - like an implicit call to std::thread::detach(). While this approach can work in some circumstances for some kinds of applications, in general it is also pretty user-hostile; it makes it difficult to reason about the safe destruction of resources used by these eager operations. In general, detached work necessitates some kind of garbage collection; e.g., std::shared_ptr, to ensure resources are kept alive until the operations complete, and can make clean shutdown nigh impossible. - -Block in the destructor until the operation completes. This approach is probably the safest to use as it preserves the structured nature of the concurrent operations, but also introduces the potential for deadlocking the application if the completion of the operation depends on the current thread making forward progress. - -The risk of deadlock might occur, for example, if a thread-pool with a small number of threads is executing code that creates a sender representing an eagerly-executing operation and then calls the destructor of that sender without joining it (e.g. because an exception was thrown). If the current thread blocks waiting for that eager operation to complete and that eager operation cannot complete until some entry enqueued to the thread-pool’s queue of work is run then the thread may wait for an indefinite amount of time. If all threads of the thread-pool are simultaneously performing such blocking operations then deadlock can result. - -There are also minor variations on each of these choices. For example: - -A variation of (1): Call std::terminate if an eager sender is destructed without joining it. This is the approach that std::thread destructor takes. - -A variation of (2): Request cancellation of the operation before detaching. This reduces the chances of operations continuing to run indefinitely in the background once they have been detached but does not solve the lifetime- or shutdown-related challenges. - -A variation of (3): Request cancellation of the operation before blocking on its completion. This is the strategy that std::jthread uses for its destructor. It reduces the risk of deadlock but does not eliminate it. - -4.10.2. Eager senders complicate algorithm implementations -Algorithms that can assume they are operating on senders with strictly lazy semantics are able to make certain optimizations that are not available if senders can be potentially eager. With lazy senders, an algorithm can safely assume that a call to execution::start on an operation state strictly happens before the execution of that async operation. This frees the algorithm from needing to resolve potential race conditions. For example, consider an algorithm sequence that puts async operations in sequence by starting an operation only after the preceding one has completed. In an expression like sequence(a(), then(src, [] { b(); }), c()), one may reasonably assume that a(), b() and c() are sequenced and therefore do not need synchronisation. Eager algorithms break that assumption. - -When an algorithm needs to deal with potentially eager senders, the potential race conditions can be resolved one of two ways, neither of which is desirable: - -Assume the worst and implement the algorithm defensively, assuming all senders are eager. This obviously has overheads both at runtime and in algorithm complexity. Resolving race conditions is hard. - -Require senders to declare whether they are eager or not with a query. Algorithms can then implement two different implementation strategies, one for strictly lazy senders and one for potentially eager senders. This addresses the performance problem of (1) while compounding the complexity problem. - -4.10.3. Eager senders incur cancellation-related overhead -Another implication of the use of eager operations is with regards to cancellation. The eagerly executing operation will not have access to the caller’s stop token until the sender is connected to a receiver. If we still want to be able to cancel the eager operation then it will need to create a new stop source and pass its associated stop token down to child operations. Then when the returned sender is eventually connected it will register a stop callback with the receiver’s stop token that will request stop on the eager sender’s stop source. - -As the eager operation does not know at the time that it is launched what the type of the receiver is going to be, and thus whether or not the stop token returned from execution::get_stop_token is an std::unstoppable_token or not, the eager operation is going to need to assume it might be later connected to a receiver with a stop token that might actually issue a stop request. Thus it needs to declare space in the operation state for a type-erased stop callback and incur the runtime overhead of supporting cancellation, even if cancellation will never be requested by the caller. - -The eager operation will also need to do this to support sending a stop request to the eager operation in the case that the sender representing the eager work is destroyed before it has been joined (assuming strategy (5) or (6) listed above is chosen). - -4.10.4. Eager senders cannot access execution resource from the receiver -In sender/receiver, contextual information is passed from parent operations to their children by way of receivers. Information like stop tokens, allocators, current scheduler, priority, and deadline are propagated to child operations with custom receivers at the time the operation is connected. That way, each operation has the contextual information it needs before it is started. - -But if the operation is started before it is connected to a receiver, then there isn’t a way for a parent operation to communicate contextual information to its child operations, which may complete before a receiver is ever attached. - -4.11. Schedulers advertise their forward progress guarantees -To decide whether a scheduler (and its associated execution resource) is sufficient for a specific task, it may be necessary to know what kind of forward progress guarantees it provides for the execution agents it creates. The C++ Standard defines the following forward progress guarantees: - -concurrent, which requires that a thread makes progress eventually; - -parallel, which requires that a thread makes progress once it executes a step; and - -weakly parallel, which does not require that the thread makes progress. - -This paper introduces a scheduler query function, get_forward_progress_guarantee, which returns one of the enumerators of a new enum type, forward_progress_guarantee. Each enumerator of forward_progress_guarantee corresponds to one of the aforementioned guarantees. - -4.12. Most sender adaptors are pipeable -To facilitate an intuitive syntax for composition, most sender adaptors are pipeable; they can be composed (piped) together with operator|. This mechanism is similar to the operator| composition that C++ range adaptors support and draws inspiration from piping in *nix shells. Pipeable sender adaptors take a sender as their first parameter and have no other sender parameters. - -a | b will pass the sender a as the first argument to the pipeable sender adaptor b. Pipeable sender adaptors support partial application of the parameters after the first. For example, all of the following are equivalent: - -execution::bulk(snd, N, [] (std::size_t i, auto d) {}); -execution::bulk(N, [] (std::size_t i, auto d) {})(snd); -snd | execution::bulk(N, [] (std::size_t i, auto d) {}); -Piping enables you to compose together senders with a linear syntax. Without it, you’d have to use either nested function call syntax, which would cause a syntactic inversion of the direction of control flow, or you’d have to introduce a temporary variable for each stage of the pipeline. Consider the following example where we want to execute first on a CPU thread pool, then on a CUDA GPU, then back on the CPU thread pool: - -Syntax Style Example -Function call -(nested) -auto snd = execution::then( - execution::continues_on( - execution::then( - execution::continues_on( - execution::then( - execution::schedule(thread_pool.scheduler()) - []{ return 123; }), - cuda::new_stream_scheduler()), - [](int i){ return 123 * 5; }), - thread_pool.scheduler()), - [](int i){ return i - 5; }); -auto [result] = this_thread::sync_wait(snd).value(); -// result == 610 -Function call -(named temporaries) -auto snd0 = execution::schedule(thread_pool.scheduler()); -auto snd1 = execution::then(snd0, []{ return 123; }); -auto snd2 = execution::continues_on(snd1, cuda::new_stream_scheduler()); -auto snd3 = execution::then(snd2, [](int i){ return 123 * 5; }) -auto snd4 = execution::continues_on(snd3, thread_pool.scheduler()) -auto snd5 = execution::then(snd4, [](int i){ return i - 5; }); -auto [result] = *this_thread::sync_wait(snd4); -// result == 610 -Pipe -auto snd = execution::schedule(thread_pool.scheduler()) - | execution::then([]{ return 123; }) - | execution::continues_on(cuda::new_stream_scheduler()) - | execution::then([](int i){ return 123 * 5; }) - | execution::continues_on(thread_pool.scheduler()) - | execution::then([](int i){ return i - 5; }); -auto [result] = this_thread::sync_wait(snd).value(); -// result == 610 -Certain sender adaptors are not pipeable, because using the pipeline syntax can result in confusion of the semantics of the adaptors involved. Specifically, the following sender adaptors are not pipeable. - -execution::when_all and execution::when_all_with_variant: Since this sender adaptor takes a variadic pack of senders, a partially applied form would be ambiguous with a non partially applied form with an arity of one less. - -execution::starts_on: This sender adaptor changes how the sender passed to it is executed, not what happens to its result, but allowing it in a pipeline makes it read as if it performed a function more similar to continues_on. - -Sender consumers could be made pipeable, but we have chosen to not do so. However, since these are terminal nodes in a pipeline and nothing can be piped after them, we believe a pipe syntax may be confusing as well as unnecessary, as consumers cannot be chained. We believe sender consumers read better with function call syntax. - -4.13. A range of senders represents an async sequence of data -Senders represent a single unit of asynchronous work. In many cases though, what is being modeled is a sequence of data arriving asynchronously, and you want computation to happen on demand, when each element arrives. This requires nothing more than what is in this paper and the range support in C++20. A range of senders would allow you to model such input as keystrikes, mouse movements, sensor readings, or network requests. - -Given some expression R that is a range of senders, consider the following in a coroutine that returns an async generator type: - -for (auto snd : R) { - if (auto opt = co_await execution::stopped_as_optional(std::move(snd))) - co_yield fn(*std::move(opt)); - else - break; -} -This transforms each element of the asynchronous sequence R with the function fn on demand, as the data arrives. The result is a new asynchronous sequence of the transformed values. - -Now imagine that R is the simple expression views::iota(0) | views::transform(execution::just). This creates a lazy range of senders, each of which completes immediately with monotonically increasing integers. The above code churns through the range, generating a new infine asynchronous range of values [fn(0), fn(1), fn(2), ...]. - -Far more interesting would be if R were a range of senders representing, say, user actions in a UI. The above code gives a simple way to respond to user actions on demand. - -4.14. Senders can represent partial success -Receivers have three ways they can complete: with success, failure, or cancellation. This begs the question of how they can be used to represent async operations that partially succeed. For example, consider an API that reads from a socket. The connection could drop after the API has filled in some of the buffer. In cases like that, it makes sense to want to report both that the connection dropped and that some data has been successfully read. - -Often in the case of partial success, the error condition is not fatal nor does it mean the API has failed to satisfy its post-conditions. It is merely an extra piece of information about the nature of the completion. In those cases, "partial success" is another way of saying "success". As a result, it is sensible to pass both the error code and the result (if any) through the value channel, as shown below: - -// Capture a buffer for read_socket_async to fill in -execution::just(array{}) - | execution::let_value([socket](array& buff) { - // read_socket_async completes with two values: an error_code and - // a count of bytes: - return read_socket_async(socket, span{buff}) - // For success (partial and full), specify the next action: - | execution::let_value([](error_code err, size_t bytes_read) { - if (err != 0) { - // OK, partial success. Decide how to deal with the partial results - } else { - // OK, full success here. - } - }); - }) -In other cases, the partial success is more of a partial failure. That happens when the error condition indicates that in some way the function failed to satisfy its post-conditions. In those cases, sending the error through the value channel loses valuable contextual information. It’s possible that bundling the error and the incomplete results into an object and passing it through the error channel makes more sense. In that way, generic algorithms will not miss the fact that a post-condition has not been met and react inappropriately. - -Another possibility is for an async API to return a range of senders: if the API completes with full success, full error, or cancellation, the returned range contains just one sender with the result. Otherwise, if the API partially fails (doesn’t satisfy its post-conditions, but some incomplete result is available), the returned range would have two senders: the first containing the partial result, and the second containing the error. Such an API might be used in a coroutine as follows: - -// Declare a buffer for read_socket_async to fill in -array buff; - -for (auto snd : read_socket_async(socket, span{buff})) { - try { - if (optional bytes_read = - co_await execution::stopped_as_optional(std::move(snd))) { - // OK, we read some bytes into buff. Process them here.... - } else { - // The socket read was cancelled and returned no data. React - // appropriately. - } - } catch (...) { - // read_socket_async failed to meet its post-conditions. - // Do some cleanup and propagate the error... - } -} -Finally, it’s possible to combine these two approaches when the API can both partially succeed (meeting its post-conditions) and partially fail (not meeting its post-conditions). - -4.15. All awaitables are senders -Since C++20 added coroutines to the standard, we expect that coroutines and awaitables will be how a great many will choose to express their asynchronous code. However, in this paper, we are proposing to add a suite of asynchronous algorithms that accept senders, not awaitables. One might wonder whether and how these algorithms will be accessible to those who choose coroutines instead of senders. - -In truth there will be no problem because all generally awaitable types automatically model the sender concept. The adaptation is transparent and happens in the sender customization points, which are aware of awaitables. (By "generally awaitable" we mean types that don’t require custom await_transform trickery from a promise type to make them awaitable.) - -For an example, imagine a coroutine type called task that knows nothing about senders. It doesn’t implement any of the sender customization points. Despite that fact, and despite the fact that the this_thread::sync_wait algorithm is constrained with the sender concept, the following would compile and do what the user wants: - -task doSomeAsyncWork(); - -int main() { - // OK, awaitable types satisfy the requirements for senders: - auto o = this_thread::sync_wait(doSomeAsyncWork()); -} -Since awaitables are senders, writing a sender-based asynchronous algorithm is trivial if you have a coroutine task type: implement the algorithm as a coroutine. If you are not bothered by the possibility of allocations and indirections as a result of using coroutines, then there is no need to ever write a sender, a receiver, or an operation state. - -4.16. Many senders can be trivially made awaitable -If you choose to implement your sender-based algorithms as coroutines, you’ll run into the issue of how to retrieve results from a passed-in sender. This is not a problem. If the coroutine type opts in to sender support -- trivial with the execution::with_awaitable_senders utility -- then a large class of senders are transparently awaitable from within the coroutine. - -For example, consider the following trivial implementation of the sender-based retry algorithm: - -template - requires single-sender // see [exec.as.awaitable] -task> retry(S s) { - for (;;) { - try { - co_return co_await s; - } catch(...) { - } - } -} -Only some senders can be made awaitable directly because of the fact that callbacks are more expressive than coroutines. An awaitable expression has a single type: the result value of the async operation. In contrast, a callback can accept multiple arguments as the result of an operation. What’s more, the callback can have overloaded function call signatures that take different sets of arguments. There is no way to automatically map such senders into awaitables. The with_awaitable_senders utility recognizes as awaitables those senders that send a single value of a single type. To await another kind of sender, a user would have to first map its value channel into a single value of a single type -- say, with the into_variant sender algorithm -- before co_await-ing that sender. - -4.17. Cancellation of a sender can unwind a stack of coroutines -When looking at the sender-based retry algorithm in the previous section, we can see that the value and error cases are correctly handled. But what about cancellation? What happens to a coroutine that is suspended awaiting a sender that completes by calling execution::set_stopped? - -When your task type’s promise inherits from with_awaitable_senders, what happens is this: the coroutine behaves as if an uncatchable exception had been thrown from the co_await expression. (It is not really an exception, but it’s helpful to think of it that way.) Provided that the promise types of the calling coroutines also inherit from with_awaitable_senders, or more generally implement a member function called unhandled_stopped, the exception unwinds the chain of coroutines as if an exception were thrown except that it bypasses catch(...) clauses. - -In order to "catch" this uncatchable stopped exception, one of the calling coroutines in the stack would have to await a sender that maps the stopped channel into either a value or an error. That is achievable with the execution::let_stopped, execution::upon_stopped, execution::stopped_as_optional, or execution::stopped_as_error sender adaptors. For instance, we can use execution::stopped_as_optional to "catch" the stopped signal and map it into an empty optional as shown below: - -if (auto opt = co_await execution::stopped_as_optional(some_sender)) { - // OK, some_sender completed successfully, and opt contains the result. -} else { - // some_sender completed with a cancellation signal. -} -As described in the section "All awaitables are senders", the sender customization points recognize awaitables and adapt them transparently to model the sender concept. When connect-ing an awaitable and a receiver, the adaptation layer awaits the awaitable within a coroutine that implements unhandled_stopped in its promise type. The effect of this is that an "uncatchable" stopped exception propagates seamlessly out of awaitables, causing execution::set_stopped to be called on the receiver. - -Obviously, unhandled_stopped is a library extension of the coroutine promise interface. Many promise types will not implement unhandled_stopped. When an uncatchable stopped exception tries to propagate through such a coroutine, it is treated as an unhandled exception and terminate is called. The solution, as described above, is to use a sender adaptor to handle the stopped exception before awaiting it. It goes without saying that any future Standard Library coroutine types ought to implement unhandled_stopped. The author of Add lazy coroutine (coroutine task) type, which proposes a standard coroutine task type, is in agreement. - -4.18. Composition with parallel algorithms -The C++ Standard Library provides a large number of algorithms that offer the potential for non-sequential execution via the use of execution policies. The set of algorithms with execution policy overloads are often referred to as "parallel algorithms", although additional policies are available. - -Existing policies, such as execution::par, give the implementation permission to execute the algorithm in parallel. However, the choice of execution resources used to perform the work is left to the implementation. - -We will propose a customization point for combining schedulers with policies in order to provide control over where work will execute. - -template -unspecified executing_on( - execution::scheduler auto scheduler, - ExecutionPolicy && policy -); -This function would return an object of an unspecified type which can be used in place of an execution policy as the first argument to one of the parallel algorithms. The overload selected by that object should execute its computation as requested by policy while using scheduler to create any work to be run. The expression may be ill-formed if scheduler is not able to support the given policy. - -The existing parallel algorithms are synchronous; all of the effects performed by the computation are complete before the algorithm returns to its caller. This remains unchanged with the executing_on customization point. - -In the future, we expect additional papers will propose asynchronous forms of the parallel algorithms which (1) return senders rather than values or void and (2) where a customization point pairing a sender with an execution policy would similarly be used to obtain an object of unspecified type to be provided as the first argument to the algorithm. - -4.19. User-facing sender factories -A sender factory is an algorithm that takes no senders as parameters and returns a sender. - -4.19.1. execution::schedule -execution::sender auto schedule( - execution::scheduler auto scheduler -); -Returns a sender describing the start of a task graph on the provided scheduler. See § 4.2 Schedulers represent execution resources. - -execution::scheduler auto sch1 = get_system_thread_pool().scheduler(); - -execution::sender auto snd1 = execution::schedule(sch1); -// snd1 describes the creation of a new task on the system thread pool -4.19.2. execution::just -execution::sender auto just( - auto ...&& values -); -Returns a sender with no completion schedulers, which sends the provided values. The input values are decay-copied into the returned sender. When the returned sender is connected to a receiver, the values are moved into the operation state if the sender is an rvalue; otherwise, they are copied. Then xvalues referencing the values in the operation state are passed to the receiver’s set_value. - -execution::sender auto snd1 = execution::just(3.14); -execution::sender auto then1 = execution::then(snd1, [] (double d) { - std::cout << d << "\n"; -}); - -execution::sender auto snd2 = execution::just(3.14, 42); -execution::sender auto then2 = execution::then(snd2, [] (double d, int i) { - std::cout << d << ", " << i << "\n"; -}); - -std::vector v3{1, 2, 3, 4, 5}; -execution::sender auto snd3 = execution::just(v3); -execution::sender auto then3 = execution::then(snd3, [] (std::vector&& v3copy) { - for (auto&& e : v3copy) { e *= 2; } - return std::move(v3copy); -} -auto&& [v3copy] = this_thread::sync_wait(then3).value(); -// v3 contains {1, 2, 3, 4, 5}; v3copy will contain {2, 4, 6, 8, 10}. - -execution::sender auto snd4 = execution::just(std::vector{1, 2, 3, 4, 5}); -execution::sender auto then4 = execution::then(std::move(snd4), [] (std::vector&& v4) { - for (auto&& e : v4) { e *= 2; } - return std::move(v4); -}); -auto&& [v4] = this_thread::sync_wait(std::move(then4)).value(); -// v4 contains {2, 4, 6, 8, 10}. No vectors were copied in this example. -4.19.3. execution::just_error -execution::sender auto just_error( - auto && error -); -Returns a sender with no completion schedulers, which completes with the specified error. If the provided error is an lvalue reference, a copy is made inside the returned sender and a non-const lvalue reference to the copy is sent to the receiver’s set_error. If the provided value is an rvalue reference, it is moved into the returned sender and an rvalue reference to it is sent to the receiver’s set_error. - -4.19.4. execution::just_stopped -execution::sender auto just_stopped(); -Returns a sender with no completion schedulers, which completes immediately by calling the receiver’s set_stopped. - -4.19.5. execution::read_env -execution::sender auto read_env(auto tag); -Returns a sender that reaches into a receiver’s environment and pulls out the current value associated with the customization point denoted by Tag. It then sends the value read back to the receiver through the value channel. For instance, read_env(get_scheduler) is a sender that asks the receiver for the currently suggested scheduler and passes it to the receiver’s set_value completion-signal. - -This can be useful when scheduling nested dependent work. The following sender pulls the current schduler into the value channel and then schedules more work onto it. - -execution::sender auto task = - execution::read_env(get_scheduler) - | execution::let_value([](auto sched) { - return execution::starts_on(sched, some nested work here); - }); - -this_thread::sync_wait( std::move(task) ); // wait for it to finish -This code uses the fact that sync_wait associates a scheduler with the receiver that it connects with task. read_env(get_scheduler) reads that scheduler out of the receiver, and passes it to let_value's receiver’s set_value function, which in turn passes it to the lambda. That lambda returns a new sender that uses the scheduler to schedule some nested work onto sync_wait's scheduler. - -4.20. User-facing sender adaptors -A sender adaptor is an algorithm that takes one or more senders, which it may execution::connect, as parameters, and returns a sender, whose completion is related to the sender arguments it has received. - -Sender adaptors are lazy, that is, they are never allowed to submit any work for execution prior to the returned sender being started later on, and are also guaranteed to not start any input senders passed into them. Sender consumers such as § 4.21.1 this_thread::sync_wait start senders. - -For more implementer-centric description of starting senders, see § 5.5 Sender adaptors are lazy. - -4.20.1. execution::continues_on -execution::sender auto continues_on( - execution::sender auto input, - execution::scheduler auto scheduler -); -Returns a sender describing the transition from the execution agent of the input sender to the execution agent of the target scheduler. See § 4.6 Execution resource transitions are explicit. - -execution::scheduler auto cpu_sched = get_system_thread_pool().scheduler(); -execution::scheduler auto gpu_sched = cuda::scheduler(); - -execution::sender auto cpu_task = execution::schedule(cpu_sched); -// cpu_task describes the creation of a new task on the system thread pool - -execution::sender auto gpu_task = execution::continues_on(cpu_task, gpu_sched); -// gpu_task describes the transition of the task graph described by cpu_task to the gpu -4.20.2. execution::then -execution::sender auto then( - execution::sender auto input, - std::invocable function -); -then returns a sender describing the task graph described by the input sender, with an added node of invoking the provided function with the values sent by the input sender as arguments. - -then is guaranteed to not begin executing function until the returned sender is started. - -execution::sender auto input = get_input(); -execution::sender auto snd = execution::then(input, [](auto... args) { - std::print(args...); -}); -// snd describes the work described by pred -// followed by printing all of the values sent by pred -This adaptor is included as it is necessary for writing any sender code that actually performs a useful function. - -4.20.3. execution::upon_* -execution::sender auto upon_error( - execution::sender auto input, - std::invocable function -); - -execution::sender auto upon_stopped( - execution::sender auto input, - std::invocable auto function -); -upon_error and upon_stopped are similar to then, but where then works with values sent by the input sender, upon_error works with errors, and upon_stopped is invoked when the "stopped" signal is sent. - -4.20.4. execution::let_* -execution::sender auto let_value( - execution::sender auto input, - std::invocable function -); - -execution::sender auto let_error( - execution::sender auto input, - std::invocable function -); - -execution::sender auto let_stopped( - execution::sender auto input, - std::invocable auto function -); -let_value is very similar to then: when it is started, it invokes the provided function with the values sent by the input sender as arguments. However, where the sender returned from then sends exactly what that function ends up returning - let_value requires that the function return a sender, and the sender returned by let_value sends the values sent by the sender returned from the callback. This is similar to the notion of "future unwrapping" in future/promise-based frameworks. - -let_value is guaranteed to not begin executing function until the returned sender is started. - -let_error and let_stopped are similar to let_value, but where let_value works with values sent by the input sender, let_error works with errors, and let_stopped is invoked when the "stopped" signal is sent. - -4.20.5. execution::starts_on -execution::sender auto starts_on( - execution::scheduler auto sched, - execution::sender auto snd -); -Returns a sender which, when started, will start the provided sender on an execution agent belonging to the execution resource associated with the provided scheduler. This returned sender has no completion schedulers. - -4.20.6. execution::into_variant -execution::sender auto into_variant( - execution::sender auto snd -); -Returns a sender which sends a variant of tuples of all the possible sets of types sent by the input sender. Senders can send multiple sets of values depending on runtime conditions; this is a helper function that turns them into a single variant value. - -4.20.7. execution::stopped_as_optional -execution::sender auto stopped_as_optional( - single-sender auto snd -); -Returns a sender that maps the value channel from a T to an optional>, and maps the stopped channel to a value of an empty optional>. - -4.20.8. execution::stopped_as_error -template -execution::sender auto stopped_as_error( - execution::sender auto snd, - Error err -); -Returns a sender that maps the stopped channel to an error of err. - -4.20.9. execution::bulk -execution::sender auto bulk( - execution::sender auto input, - std::integral auto shape, - invocable function -); -Returns a sender describing the task of invoking the provided function with every index in the provided shape along with the values sent by the input sender. The returned sender completes once all invocations have completed, or an error has occurred. If it completes by sending values, they are equivalent to those sent by the input sender. - -No instance of function will begin executing until the returned sender is started. Each invocation of function runs in an execution agent whose forward progress guarantees are determined by the scheduler on which they are run. All agents created by a single use of bulk execute with the same guarantee. The number of execution agents used by bulk is not specified. This allows a scheduler to execute some invocations of the function in parallel. - -In this proposal, only integral types are used to specify the shape of the bulk section. We expect that future papers may wish to explore extensions of the interface to explore additional kinds of shapes, such as multi-dimensional grids, that are commonly used for parallel computing tasks. - -4.20.10. execution::split -execution::sender auto split(execution::sender auto sender); -If the provided sender is a multi-shot sender, returns that sender. Otherwise, returns a multi-shot sender which sends values equivalent to the values sent by the provided sender. See § 4.7 Senders can be either multi-shot or single-shot. - -4.20.11. execution::when_all -execution::sender auto when_all( - execution::sender auto ...inputs -); - -execution::sender auto when_all_with_variant( - execution::sender auto ...inputs -); -when_all returns a sender that completes once all of the input senders have completed. It is constrained to only accept senders that can complete with a single set of values (_i.e._, it only calls one overload of set_value on its receiver). The values sent by this sender are the values sent by each of the input senders, in order of the arguments passed to when_all. It completes inline on the execution resource on which the last input sender completes, unless stop is requested before when_all is started, in which case it completes inline within the call to start. - -when_all_with_variant does the same, but it adapts all the input senders using into_variant, and so it does not constrain the input arguments as when_all does. - -The returned sender has no completion schedulers. - -execution::scheduler auto sched = thread_pool.scheduler(); - -execution::sender auto sends_1 = ...; -execution::sender auto sends_abc = ...; - -execution::sender auto both = execution::when_all( - sends_1, - sends_abc -); - -execution::sender auto final = execution::then(both, [](auto... args){ - std::cout << std::format("the two args: {}, {}", args...); -}); -// when final executes, it will print "the two args: 1, abc" -4.21. User-facing sender consumers -A sender consumer is an algorithm that takes one or more senders, which it may execution::connect, as parameters, and does not return a sender. - -4.21.1. this_thread::sync_wait -auto sync_wait( - execution::sender auto sender -) requires (always-sends-same-values(sender)) - -> std::optional>; -this_thread::sync_wait is a sender consumer that submits the work described by the provided sender for execution, blocking the current std::thread or thread of main until the work is completed, and returns an optional tuple of values that were sent by the provided sender on its completion of work. Where § 4.19.1 execution::schedule and § 4.19.2 execution::just are meant to enter the domain of senders, sync_wait is one way to exit the domain of senders, retrieving the result of the task graph. - -If the provided sender sends an error instead of values, sync_wait throws that error as an exception, or rethrows the original exception if the error is of type std::exception_ptr. - -If the provided sender sends the "stopped" signal instead of values, sync_wait returns an empty optional. - -For an explanation of the requires clause, see § 5.8 All senders are typed. That clause also explains another sender consumer, built on top of sync_wait: sync_wait_with_variant. - -Note: This function is specified inside std::this_thread, and not inside execution. This is because sync_wait has to block the current execution agent, but determining what the current execution agent is is not reliable. Since the standard does not specify any functions on the current execution agent other than those in std::this_thread, this is the flavor of this function that is being proposed. If C++ ever obtains fibers, for instance, we expect that a variant of this function called std::this_fiber::sync_wait would be provided. We also expect that runtimes with execution agents that use different synchronization mechanisms than std::thread's will provide their own flavors of sync_wait as well (assuming their execution agents have the means to block in a non-deadlock manner). - -5. Design - implementer side -5.1. Receivers serve as glue between senders -A receiver is a callback that supports more than one channel. In fact, it supports three of them: - -set_value, which is the moral equivalent of an operator() or a function call, which signals successful completion of the operation its execution depends on; - -set_error, which signals that an error has happened during scheduling of the current work, executing the current work, or at some earlier point in the sender chain; and - -set_stopped, which signals that the operation completed without succeeding (set_value) and without failing (set_error). This result is often used to indicate that the operation stopped early, typically because it was asked to do so because the result is no longer needed. - -Once an async operation has been started exactly one of these functions must be invoked on a receiver before it is destroyed. - -While the receiver interface may look novel, it is in fact very similar to the interface of std::promise, which provides the first two signals as set_value and set_exception, and it’s possible to emulate the third channel with lifetime management of the promise. - -Receivers are not a part of the end-user-facing API of this proposal; they are necessary to allow unrelated senders communicate with each other, but the only users who will interact with receivers directly are authors of senders. - -Receivers are what is passed as the second argument to § 5.3 execution::connect. - -5.2. Operation states represent work -An operation state is an object that represents work. Unlike senders, it is not a chaining mechanism; instead, it is a concrete object that packages the work described by a full sender chain, ready to be executed. An operation state is neither movable nor copyable, and its interface consists of a single algorithm: start, which serves as the submission point of the work represented by a given operation state. - -Operation states are not a part of the user-facing API of this proposal; they are necessary for implementing sender consumers like this_thread::sync_wait, and the knowledge of them is necessary to implement senders, so the only users who will interact with operation states directly are authors of senders and authors of sender algorithms. - -The return value of § 5.3 execution::connect must satisfy the operation state concept. - -5.3. execution::connect -execution::connect is a customization point which connects senders with receivers, resulting in an operation state that will ensure that if start is called that one of the completion operations will be called on the receiver passed to connect. - -execution::sender auto snd = some input sender; -execution::receiver auto rcv = some receiver; -execution::operation_state auto state = execution::connect(snd, rcv); - -execution::start(state); -// at this point, it is guaranteed that the work represented by state has been submitted -// to an execution resource, and that execution resource will eventually call one of the -// completion operations on rcv - -// operation states are not movable, and therefore this operation state object must be -// kept alive until the operation finishes -5.4. Sender algorithms are customizable -Senders being able to advertise what their completion schedulers are fulfills one of the promises of senders: that of being able to customize an implementation of a sender algorithm based on what scheduler any work it depends on will complete on. - -The simple way to provide customizations for functions like then, that is for sender adaptors and sender consumers, is to follow the customization scheme that has been adopted for C++20 ranges library; to do that, we would define the expression execution::then(sender, invocable) to be equivalent to: - -sender.then(invocable), if that expression is well-formed; otherwise - -then(sender, invocable), performed in a context where this call always performs ADL, if that expression is well-formed; otherwise - -a default implementation of then, which returns a sender adaptor, and then define the exact semantics of said adaptor. - -However, this definition is problematic. Imagine another sender adaptor, bulk, which is a structured abstraction for a loop over an index space. Its default implementation is just a for loop. However, for accelerator runtimes like CUDA, we would like sender algorithms like bulk to have specialized behavior, which invokes a kernel of more than one thread (with its size defined by the call to bulk); therefore, we would like to customize bulk for CUDA senders to achieve this. However, there’s no reason for CUDA kernels to necessarily customize the then sender adaptor, as the generic implementation is perfectly sufficient. This creates a problem, though; consider the following snippet: - -execution::scheduler auto cuda_sch = cuda_scheduler{}; - -execution::sender auto initial = execution::schedule(cuda_sch); -// the type of initial is a type defined by the cuda_scheduler -// let’s call it cuda::schedule_sender<> - -execution::sender auto next = execution::then(cuda_sch, []{ return 1; }); -// the type of next is a standard-library unspecified sender adaptor -// that wraps the cuda sender -// let’s call it execution::then_sender_adaptor> - -execution::sender auto kernel_sender = execution::bulk(next, shape, [](int i){ ... }); -How can we specialize the bulk sender adaptor for our wrapped schedule_sender? Well, here’s one possible approach, taking advantage of ADL (and the fact that the definition of "associated namespace" also recursively enumerates the associated namespaces of all template parameters of a type): - -namespace cuda::for_adl_purposes { -template -class schedule_sender { - execution::operation_state auto connect(execution::receiver auto rcv); - execution::scheduler auto get_completion_scheduler() const; -}; - -execution::sender auto bulk( - execution::sender auto && input, - execution::shape auto && shape, - invocable%lt;sender-values(input)> auto && fn) -{ - // return a cuda sender representing a bulk kernel launch -} -} // namespace cuda::for_adl_purposes -However, if the input sender is not just a then_sender_adaptor like in the example above, but another sender that overrides bulk by itself, as a member function, because its author believes they know an optimization for bulk - the specialization above will no longer be selected, because a member function of the first argument is a better match than the ADL-found overload. - -This means that well-meant specialization of sender algorithms that are entirely scheduler-agnostic can have negative consequences. The scheduler-specific specialization - which is essential for good performance on platforms providing specialized ways to launch certain sender algorithms - would not be selected in such cases. But it’s really the scheduler that should control the behavior of sender algorithms when a non-default implementation exists, not the sender. Senders merely describe work; schedulers, however, are the handle to the runtime that will eventually execute said work, and should thus have the final say in how the work is going to be executed. - -Therefore, we are proposing the following customization scheme: the expression execution::(sender, args...), for any given sender algorithm that accepts a sender as its first argument, should do the following: - -Create a sender that implements the default implementation of the sender algorithm. That sender is tuple-like; it can be destructured into its constituent parts: algorithm tag, data, and child sender(s). - -We query the child sender for its domain. A domain is a tag type associated with the scheduler that the child sender will complete on. If there are multiple child senders, we query all of them for their domains and require that they all be the same. - -We use the domain to dispatch to a transform_sender customization, which accepts the sender and optionally performs a domain-specific transformation on it. This customization is expected to return a new sender, which will be returned from in place of the original sender. - -5.5. Sender adaptors are lazy -Contrary to early revisions of this paper, we propose to make all sender adaptors perform strictly lazy submission, unless specified otherwise. - -Strictly lazy submission means that there is a guarantee that no work is submitted to an execution resource before a receiver is connected to a sender, and execution::start is called on the resulting operation state. - -5.6. Lazy senders provide optimization opportunities -Because lazy senders fundamentally describe work, instead of describing or representing the submission of said work to an execution resource, and thanks to the flexibility of the customization of most sender algorithms, they provide an opportunity for fusing multiple algorithms in a sender chain together, into a single function that can later be submitted for execution by an execution resource. There are two ways this can happen. - -The first (and most common) way for such optimizations to happen is thanks to the structure of the implementation: because all the work is done within callbacks invoked on the completion of an earlier sender, recursively up to the original source of computation, the compiler is able to see a chain of work described using senders as a tree of tail calls, allowing for inlining and removal of most of the sender machinery. In fact, when work is not submitted to execution resources outside of the current thread of execution, compilers are capable of removing the senders abstraction entirely, while still allowing for composition of functions across different parts of a program. - -The second way for this to occur is when a sender algorithm is specialized for a specific set of arguments. For instance, an implementation could recognize two subsequent § 4.20.9 execution::bulks of compatible shapes, and merge them together into a single submission of a GPU kernel. - -5.7. Execution resource transitions are two-step -Because execution::continues_on takes a sender as its first argument, it is not actually directly customizable by the target scheduler. This is by design: the target scheduler may not know how to transition from a scheduler such as a CUDA scheduler; transitioning away from a GPU in an efficient manner requires making runtime calls that are specific to the GPU in question, and the same is usually true for other kinds of accelerators too (or for scheduler running on remote systems). To avoid this problem, specialized schedulers like the ones mentioned here can still hook into the transition mechanism, and inject a sender which will perform a transition to the regular CPU execution resource, so that any sender can be attached to it. - -This, however, is a problem: because customization of sender algorithms must be controlled by the scheduler they will run on (see § 5.4 Sender algorithms are customizable), the type of the sender returned from continues_on must be controllable by the target scheduler. Besides, the target scheduler may itself represent a specialized execution resource, which requires additional work to be performed to transition to it. GPUs and remote node schedulers are once again good examples of such schedulers: executing code on their execution resources requires making runtime API calls for work submission, and quite possibly for the data movement of the values being sent by the input sender passed into continues_on. - -To allow for such customization from both ends, we propose the inclusion of a secondary transitioning sender adaptor, called schedule_from. This adaptor is a form of schedule, but takes an additional, second argument: the input sender. This adaptor is not meant to be invoked manually by the end users; they are always supposed to invoke continues_on, to ensure that both schedulers have a say in how the transitions are made. Any scheduler that specializes continues_on(snd, sch) shall ensure that the return value of their customization is equivalent to schedule_from(sch, snd2), where snd2 is a successor of snd that sends values equivalent to those sent by snd. - -The default implementation of continues_on(snd, sched) is schedule_from(sched, snd). - -5.8. All senders are typed -All senders must advertise the types they will send when they complete. There are many sender adaptors that need this information. Even just transitioning from one execution context to another requires temporarily storing the async result data so it can be propagated in the new execution context. Doing that efficiently requires knowing the type of the data. - -The mechanism a sender uses to advertise its completions is the get_completion_signatures customization point, which takes an environment and must return a specialization of the execution::completion_signatures class template. The template parameters of execution::completion_signatures is a list of function types that represent the completion operations of the sender. for example, the type execution::set_value_t(size_t, const char*) indicates that the sender can complete successfully by passing a size_t and a const char* to the receiver’s set_value function. - -This proposal includes utilities for parsing and manipulating the list of a sender’s completion signatures. For instance, values_of_t is a template alias for accessing a sender’s value completions. It takes a sender, an environment, and two variadic template template parameters: a tuple-like template and a variant-like template. You can get the value completions of S and Env with value_types_of_t. For example, for a sender that can complete successfully with either Ts... or Us..., value_types_of_t would name the type std::variant, std::tuple>. - -5.9. Customization points -Earlier versions of this paper used a dispatching technique known as tag_invoke (see tag_invoke: A general pattern for supporting customisable functions) to allow for customization of basis operations and sender algorithms. This technique used private friend functions named "tag_invoke" that are found by argument-dependent look-up. The tag_invoke overloads are distinguished from each other by their first argument, which is the type of the customization point object being customized. For instance, to customize the execution::set_value operation, a receiver type might do the following: - -struct my_receiver { - friend void tag_invoke(execution::set_value_t, my_receiver&& self, int value) noexcept { - std::cout << "received value: " << value; - } - //... -}; -The tag_invoke technique, although it had its strengths, has been replaced with a new (or rather, a very old) technique that uses explicit concept opt-ins and named member functions. For instance, the execution::set_value operation is now customized by defining a member function named set_value in the receiver type. This technique is more explicit and easier to understand than tag_invoke. This is what a receiver author would do to customize execution::set_value now: - -struct my_receiver { - using receiver_concept = execution::receiver_t; - - void set_value(int value) && noexcept { - std::cout << "received value: " << value; - } - //... -}; -The only exception to this is the customization of queries. There is a need to build queryable adaptors that can forward an open and unknowable set of queries to some wrapped object. This is done by defining a member function named query in the adaptor type that takes the query CPO object as its first (and usually only) argument. A queryable adaptor might look like this: - -template -concept query_for = - requires (const Queryable& o, Args&&... args) { - o.query(Query(), (Args&&) args...); - }; - -template, - class Base = execution::empty_env> -struct with_allocator { - Allocator alloc{}; - Base base{}; - - // Forward unknown queries to the wrapped object: - template Query> - decltype(auto) query(Query q) const { - return base.query(q); - } - - // Specialize the query for the allocator: - Allocator query(execution::get_allocator_t) const { - return alloc; - } -}; -Customization of sender algorithms such as execution::then and execution::bulk are handled differently because they must dispatch based on where the sender is executing. See the section on § 5.4 Sender algorithms are customizable for more information. - -6. Specification -Much of this wording follows the wording of A Unified Executors Proposal for C++. - -§ 22 General utilities library [utilities] is meant to be a diff relative to the wording of the [utilities] clause of Working Draft, Standard for Programming Language C++. - -§ 33 Concurrency support library [thread] is meant to be a diff relative to the wording of the [thread] clause of Working Draft, Standard for Programming Language C++. This diff applies changes from Composable cancellation for sender-based async operations. - -§ 34 Execution control library [exec] is meant to be added as a new library clause to the working draft of C++. - -14. Exception handling [except] -14.6. Special functions [except.special] -14.6.2. The std::terminate function [except.terminate] -At the end of the bulleted list in the Note in paragraph 1, add a new bullet as follows: - -when a call to a wait(), wait_until(), or wait_for() function on a condition variable (33.7.4, 33.7.5) fails to meet a postcondition. - -when a callback invocation exits via an exception when requesting stop on a std::stop_source or a std::inplace_stop_source ([stopsource.mem], [stopsource.inplace.mem]), or in the constructor of std::stop_callback or std::inplace_stop_callback ([stopcallback.cons], [stopcallback.inplace.cons]) when a callback invocation exits via an exception. - -when a run_loop object is destroyed that is still in the running state ([exec.run.loop]). - -when unhandled_stopped() is called on a with_awaitable_senders object ([exec.with.awaitable.senders]) whose continuation is not a handle to a coroutine whose promise type has an unhandled_stopped() member function. - -16. Library introduction [library] -At the end of [expos.only.entity], add the following: - -The following are defined for exposition only to aid in the specification of the library: - -namespace std { - // ...as before... -} -An object dst is said to be decay-copied from a subexpression src if the type of dst is decay_t, and dst is copy-initialized from src. - -16.4.4.6.1. General [allocator.requirements.general] -At the end of [allocator.requirements.general], add the following new paragraph: - -[Example 2: The following is an allocator class template supporting the minimal interface that meets the requirements of [allocator.requirements.general]: - -template -struct SimpleAllocator { - using value_type = T; - SimpleAllocator(ctor args); - - template SimpleAllocator(const SimpleAllocator& other); - - T* allocate(std::size_t n); - void deallocate(T* p, std::size_t n); - - template bool operator==(const SimpleAllocator& rhs) const; -}; --- end example] - -The following exposition-only concept defines the minimal requirements on an Allocator type. - -template -concept simple-allocator = - requires(Alloc alloc, size_t n) { - { *alloc.allocate(n) } -> same_as; - { alloc.deallocate(alloc.allocate(n), n) }; - } && - copy_constructible && - equality_comparable; -A type Alloc models simple-allocator if it meets the requirements of [allocator.requirements.general]. - -17. Language support library [cpp] -17.3. Implementation properties [support.limits] -17.3.2. Header synopsis [version.syn] -To the synopsis, add the following: - -#define __cpp_lib_semaphore 201907L // also in -#define __cpp_lib_senders 2024XXL // also in -#define __cpp_lib_shared_mutex 201505L // also in -22. General utilities library [utilities] -22.10. Function objects [function.objects] -22.10.2. Header synopsis [functional.syn] -At the end of this subclause, insert the following declarations into the synopsis within namespace std: - -namespace std { - // ...as before... - - namespace ranges { - // 22.10.9, concept-constrained comparisons - struct equal_to; // freestanding - struct not_equal_to; // freestanding - struct greater; // freestanding - struct less; // freestanding - struct greater_equal; // freestanding - struct less_equal; // freestanding - } - - template - concept callable = // exposition only - requires (Fn&& fn, Args&&... args) { - std::forward(fn)(std::forward(args)...); - }; - template - concept nothrow-callable = // exposition only - callable && - requires (Fn&& fn, Args&&... args) { - { std::forward(fn)(std::forward(args)...) } noexcept; - }; - // exposition only: - template - using call-result-t = decltype(declval()(declval()...)); - - template - using decayed-typeof = decltype(auto(Tag)); // exposition only - -} -33. Concurrency support library [thread] -33.3. Stop tokens [thread.stoptoken] -33.3.1. Introduction [thread.stoptoken.intro] -Subclause [thread.stoptoken] describes components that can be used to asynchronously request that an operation stops execution in a timely manner, typically because the result is no longer required. Such a request is called a stop request. - -stop_source, stop_token, and stop_callback implement stoppable-source, stoppable_token, and stoppable-callback-for are concepts that specify the required syntax and semantics of shared ownership of access to a stop state. Any stop_source, stop_token, or stop_callback object that shares ownership of the same stop state is an associated stop_source, stop_token, or stop_callback, respectively. Any object modeling stoppable-source, stoppable_token, or stoppable-callback-for that refers to the same stop state is an associated stoppable-source, stoppable_token, or stoppable-callback-for, respectively. The last remaining owner of the stop state automatically releases the resources associated with the stop state. -A n object of a type that models stoppable_token can be passed to an operation which that can either - -actively poll the token to check if there has been a stop request, or - -register a callback using the stop_callback class template which that will be called in the event that a stop request is made. - -A stop request made via a stop_source an object whose type models stoppable-source will be visible to all associated stoppable_token and stop_source stoppable-source objects. Once a stop request has been made it cannot be withdrawn (a subsequent stop request has no effect). - -Callbacks registered via a stop_callback object an object whose type models stoppable-callback-for are called when a stop request is first made by any associated stop_source stoppable-source object. - -The following paragraph is moved to the specification of the new stoppable-source concept. - -Calls to the functions request_stop, stop_requested, and stop_possible do not introduce data races. A call to request_stop that returns true synchronizes with a call to stop_requested on an associated stop_token or stop_source object that returns true. Registration of a callback synchronizes with the invocation of that callback. - -The types stop_source and stop_token and the class template stop_callback implement the semantics of shared ownership of a stop state. The last remaining owner of the stop state automatically releases the resources associated with the stop state. - -An object of type inplace_stop_source is the sole owner of its stop state. An object of type inplace_stop_token or of a specialization of the class template inplace_stop_callback does not participate in ownership of its associated stop state. They are for use when all uses of the associated token and callback objects are known to nest within the lifetime of the inplace_stop_source object. - -33.3.2. Header synopsis [thread.stoptoken.syn] -In this subclause, insert the following declarations into the synopsis: - -namespace std { - // [stoptoken.concepts], stop token concepts - template - concept stoppable-callback-for = see below; // exposition only - - template - concept stoppable_token = see below; - - template - concept unstoppable_token = see below; - - template - concept stoppable-source = see below; // exposition only - - // 33.3.3, class stop_token - class stop_token; - - // 33.3.4, class stop_source - class stop_source; - - // no-shared-stop-state indicator - struct nostopstate_t { - explicit nostopstate_t() = default; - }; - inline constexpr nostopstate_t nostopstate{}; - - // 33.3.5, class template stop_callback - template - class stop_callback; - - // [stoptoken.never], class never_stop_token - class never_stop_token; - - // [stoptoken.inplace], class inplace_stop_token - class inplace_stop_token; - - // [stopsource.inplace], class inplace_stop_source - class inplace_stop_source; - - // [stopcallback.inplace], class template inplace_stop_callback - template - class inplace_stop_callback; - - template - using stop_callback_for_t = T::template callback_type; - -} -Insert the following subclause as a new subclause between Header synopsis [thread.stoptoken.syn] and Class stop_token [stoptoken]. - -33.3.3. Stop token concepts [stoptoken.concepts] -The exposition-only stoppable-callback-for concept checks for a callback compatible with a given Token type. - -template - concept stoppable-callback-for = // exposition only - invocable && - constructible_from && - requires { typename stop_callback_for_t; } && - constructible_from, const Token&, Initializer>; -Let t and u be distinct, valid objects of type Token that reference the same logical stop state; let init be an expression such that same_as is true; and let SCB denote the type stop_callback_for_t. - -The concept stoppable-callback-for is modeled only if: - -The following concepts are modeled: - -constructible_from - -constructible_from - -constructible_from - -An object of type SCB has an associated callback function of type CallbackFn. Let scb be an object of type SCB and let callback_fn denote scb's associated callback function. Direct-non-list-initializing scb from arguments t and init shall execute a stoppable callback registration as follows: - -If t.stop_possible() is true: - -callback_fn shall be direct-initialized with init. - -Construction of scb shall only throw exceptions thrown by the initialization of callback_fn from init. - -The callback invocation std::forward(callback_fn)() shall be registered with t's associated stop state as follows: - -If t.stop_requested() evaluates to false at the time of registration, the callback invocation is added to the stop state’s list of callbacks such that std::forward(callback_fn)() is evaluated if a stop request is made on the stop state. - -Otherwise, std::forward(callback_fn)() shall be immediately evaluated on the thread executing scb's constructor, and the callback invocation shall not be added to the list of callback invocations. - -If the callback invocation was added to stop state’s list of callbacks, scb shall be associated with the stop state. - -If t.stop_possible() is false, there is no requirement that the initialization of scb causes the initialization of callback_fn. - -Destruction of scb shall execute a stoppable callback deregistration as follows (in order): - -If the constructor of scb did not register a callback invocation with t's stop state, then the stoppable callback deregistration shall have no effect other than destroying callback_fn if it was constructed. - -Otherwise, the invocation of callback_fn shall be removed from the associated stop state. - -If callback_fn is concurrently executing on another thread then the stoppable callback deregistration shall block ([defns.block]) until the invocation of callback_fn returns such that the return from the invocation of callback_fn strongly happens before ([intro.races]) the destruction of callback_fn. - -If callback_fn is executing on the current thread, then the destructor shall not block waiting for the return from the invocation of callback_fn. - -A stoppable callback deregistration shall not block on the completion of the invocation of some other callback registered with the same logical stop state. - -The stoppable callback deregistration shall destroy callback_fn. - -The stoppable_token concept checks for the basic interface of a stop token that is copyable and allows polling to see if stop has been requested and also whether a stop request is possible. The unstoppable_token concept checks for a stoppable_token type that does not allow stopping. - -template class> - struct check-type-alias-exists; // exposition-only - -template - concept stoppable_token = - requires (const Token tok) { - typename check-type-alias-exists; - { tok.stop_requested() } noexcept -> same_as; - { tok.stop_possible() } noexcept -> same_as; - { Token(tok) } noexcept; // see implicit expression variations - // ([concepts.equality]) - } && - copyable && - equality_comparable && - swappable; - -template - concept unstoppable_token = - stoppable_token && - requires (const Token tok) { - requires bool_constant<(!tok.stop_possible())>::value; - }; -An object whose type models stoppable_token has at most one associated logical stop state. A stoppable_token object with no associated stop state is said to be disengaged. - -Let SP be an evaluation of t.stop_possible() that is false, and let SR be an evaluation of t.stop_requested() that is true. - -The type Token models stoppable_token only if: - -Any evaluation of u.stop_possible() or u.stop_requested() that happens after ([intro.races]) SP is false. - -Any evaluation of u.stop_possible() or u.stop_requested() that happens after SR is true. - -For any types CallbackFn and Initializer such that stoppable-callback-for is satisfied, stoppable-callback-for is modeled. - -If t is disengaged, evaluations of t.stop_possible() and t.stop_requested() are false. - -If t and u reference the same stop state, or if both t and u are disengaged, t == u is true; otherwise, it is false. - -An object whose type models the exposition-only stoppable-source concept can be queried whether stop has been requested (stop_requested) and whether stop is possible (stop_possible). It is a factory for associated stop tokens (get_token), and a stop request can be made on it (request_stop). It maintains a list of registered stop callback invocations that it executes when a stop request is first made. - -template - concept stoppable-source = // exposition only - requires (Source& src, const Source csrc) { // see implicit expression variations - // ([concepts.equality]) - { csrc.get_token() } -> stoppable_token; - { csrc.stop_possible() } noexcept -> same_as; - { csrc.stop_requested() } noexcept -> same_as; - { src.request_stop() } -> same_as; - }; -An object whose type models stoppable-source has at most one associated logical stop state. If it has no associated stop state, it is said to be disengaged. Let s be an object whose type models stoppable-source and that is disengaged. s.stop_possible() and s.stop_requested() shall return false. - -Let t be an object whose type models stoppable-source. If t is disengaged, t.get_token() shall return a disengaged stop token; otherwise, it shall return a stop token that is associated with the stop state of t. - -The following paragraph is moved from the introduction, with minor modifications (underlined in green). - -Calls to the member functions request_stop, stop_requested, and stop_possible and similarly named member functions on associated stoppable_token objects do not introduce data races. A call to request_stop that returns true synchronizes with a call to stop_requested on an associated stoppable_token or stop_source stoppable-source object that returns true. Registration of a callback synchronizes with the invocation of that callback. - -The following paragraph is taken from § 33.3.5.3 Member functions [stopsource.mem] and modified. - -If the stoppable-source is disengaged, request_stop shall have no effect and return false. Otherwise, it shall execute a stop request operation on the associated stop state. A stop request operation determines whether the stop state has received a stop request, and if not, makes a stop request. The determination and making of the stop request shall happen atomically, as-if by a read-modify-write operation ([intro.races]). If the request was made, the stop state’s registered callback invocations shall be synchronously executed. If an invocation of a callback exits via an exception then terminate shall be invoked ([except.terminate]). No constraint is placed on the order in which the callback invocations are executed. request_stop shall return true if a stop request was made, and false otherwise. After a call to request_stop either a call to stop_possible shall return false or a call to stop_requested shall return true. - -A stop request includes notifying all condition variables of type condition_variable_any temporarily registered during an interruptible wait ([thread.condvarany.intwait]). - -Modify subclause [stoptoken] as follows: - -33.3.4. Class stop_token [stoptoken] -33.3.4.1. General [stoptoken.general] -The class stop_token provides an interface for querying whether a stop request has been made (stop_requested) or can ever be made (stop_possible) using an associated stop_source object ([stopsource]). A stop_token can also be passed to a stop_callback ([stopcallback]) constructor to register a callback to be called when a stop request has been made from an associated stop_source. The class stop_token models the concept stoppable_token. It shares ownership of its stop state, if any, with its associated stop_source object ([stopsource]) and any stop_token objects to which it compares equal. -namespace std { - class stop_token { - public: - template - using callback_type = stop_callback; - - // [stoptoken.cons], constructors, copy, and assignment - stop_token() noexcept = default; - - stop_token(const stop_token&) noexcept; - stop_token(stop_token&&) noexcept; - stop_token& operator=(const stop_token&) noexcept; - stop_token& operator=(stop_token&&) noexcept; - ~stop_token(); - - - // [stoptoken.mem], Member functions - void swap(stop_token&) noexcept; - - // [stoptoken.mem], stop handling - [[nodiscard]] bool stop_requested() const noexcept; - [[nodiscard]] bool stop_possible() const noexcept; - - bool operator==(const stop_token& rhs) const noexcept = default; - [[nodiscard]] friend bool operator==(const stop_token& lhs, const stop_token& rhs) noexcept; - friend void swap(stop_token& lhs, stop_token& rhs) noexcept; - private: - shared_ptr stop-state{}; // exposition only - }; -} -stop-state refers to the stop_token's associated stop state. A stop_token object is disengaged when stop-state is empty. - -33.3.4.2. Constructors, copy, and assignment [stoptoken.cons] -stop_token() noexcept; -Postconditions: stop_possible() is false and stop_requested() is false. Because the created stop_token object can never receive a stop request, no resources are allocated for a stop state. - -stop_token(const stop_token& rhs) noexcept; -Postconditions: *this == rhs is true. *this and rhs share the ownership of the same stop state, if any. - -stop_token(stop_token&& rhs) noexcept; -Postconditions: *this contains the value of rhs prior to the start of construction and rhs.stop_possible() is false. - -~stop_token(); -Effects: Releases ownership of the stop state, if any. - -stop_token& operator=(const stop_token& rhs) noexcept; -Effects: Equivalent to: stop_token(rhs).swap(*this). - -Returns: *this. - -stop_token& operator=(stop_token&& rhs) noexcept; -Effects: Equivalent to: stop_token(std::move(rhs)).swap(*this). - -Returns: *this. - -Move swap into [stoptoken.mem]: - -33.3.4.3. Member functions [stoptoken.mem] -void swap(stop_token& rhs) noexcept; -Effects: Exchanges the values of *this and rhs. Equivalent to: stop-state.swap(rhs.stop-state). - -[[nodiscard]] bool stop_requested() const noexcept; -Returns: true if *this has ownership of stop-state refers to a stop state that has received a stop request; otherwise, false. - -[[nodiscard]] bool stop_possible() const noexcept; -Returns: false if: - -*this does not have ownership of a stop state is disengaged , or - -a stop request was not made and there are no associated stop_source objects; otherwise, true. - -The following are covered by the equality_comparable and swappable concepts. - -33.3.4.4. Non-member functions [stoptoken.nonmembers] -[[nodiscard]] bool operator==(const stop_token& lhs, const stop_token& rhs) noexcept; -Returns: true if lhs and rhs have ownership of the same stop state or if both lhs and rhs do not have ownership of a stop state; otherwise false. - -friend void swap(stop_token& x, stop_token& y) noexcept; -Effects: Equivalent to: x.swap(y). - -33.3.5. Class stop_source [stopsource] -33.3.5.1. General [stopsource.general] -The class stop_source implements the semantics of making a stop request. A stop request made on a stop_source object is visible to all associated stop_source and stop_token ([thread.stoptoken]) objects. Once a stop request has been made it cannot be withdrawn (a subsequent stop request has no effect). -namespace std { - The following definitions are already specified in the synopsis: - // no-shared-stop-state indicator - struct nostopstate_t { - explicit nostopstate_t() = default; - }; - inline constexpr nostopstate_t nostopstate{}; - - - class stop_source { - public: - // 33.3.4.2, constructors, copy, and assignment - stop_source(); - explicit stop_source(nostopstate_t) noexcept; {} - - stop_source(const stop_source&) noexcept; - stop_source(stop_source&&) noexcept; - stop_source& operator=(const stop_source&) noexcept; - stop_source& operator=(stop_source&&) noexcept; - ~stop_source(); - - - // [stopsource.mem], Member functions - void swap(stop_source&) noexcept; - - // 33.3.4.3, stop handling - [[nodiscard]] stop_token get_token() const noexcept; - [[nodiscard]] bool stop_possible() const noexcept; - [[nodiscard]] bool stop_requested() const noexcept; - bool request_stop() noexcept; - - bool operator==(const stop_source& rhs) const noexcept = default; - [[nodiscard]] friend bool - operator==(const stop_source& lhs, const stop_source& rhs) noexcept; - friend void swap(stop_source& lhs, stop_source& rhs) noexcept; - - private: - shared_ptr stop-state{}; // exposition only - }; -} -stop-state refers to the stop_source's associated stop state. A stop_source object is disengaged when stop-state is empty. - -stop_source models stoppable-source, copyable, equality_comparable, and swappable. - -33.3.5.2. Constructors, copy, and assignment [stopsource.cons] -stop_source(); -Effects: Initialises *this to have ownership of stop-state with a pointer to a new stop state. - -Postconditions: stop_possible() is true and stop_requested() is false. - -Throws: bad_alloc if memory cannot be allocated for the stop state. - -explicit stop_source(nostopstate_t) noexcept; -Postconditions: stop_possible() is false and stop_requested() is false. No resources are allocated for the state. - -stop_source(const stop_source& rhs) noexcept; -Postconditions: *this == rhs is true. *this and rhs share the ownership of the same stop state, if any. - -stop_source(stop_source&& rhs) noexcept; -Postconditions: *this contains the value of rhs prior to the start of construction and rhs.stop_possible() is false. - -~stop_source(); -Effects: Releases ownership of the stop state, if any. - -stop_source& operator=(const stop_source& rhs) noexcept; -Effects: Equivalent to: stop_source(rhs).swap(*this). - -Returns: *this. - -stop_source& operator=(stop_source&& rhs) noexcept; -Effects: Equivalent to: stop_source(std::move(rhs)).swap(*this). - -Returns: *this. - -Move swap into [stopsource.mem]: - -33.3.5.3. Member functions [stopsource.mem] -void swap(stop_source& rhs) noexcept; -Effects: Exchanges the values of *this and rhs Equivalent to: stop-state.swap(rhs.stop-state) . - -[[nodiscard]] stop_token get_token() const noexcept; -Returns: stop_token() if stop_possible() is false; otherwise a new associated stop_token object ; i.e., its stop-state member is equal to the stop-state member of *this . - -[[nodiscard]] bool stop_possible() const noexcept; -Returns: true if *this has ownership of a stop state; otherwise, false stop-state != nullptr . - -[[nodiscard]] bool stop_requested() const noexcept; -Returns: true if *this has ownership of stop-state refers to a stop state that has received a stop request; otherwise, false. - -bool request_stop() noexcept; -Effects: Executes a stop request operation ([stoptoken.concepts]) on the associated stop state, if any. - -Effects: If *this does not have ownership of a stop state, returns false. Otherwise, atomically determines whether the owned stop state has received a stop request, and if not, makes a stop request. The determination and making of the stop request are an atomic read-modify-write operation ([intro.races]). If the request was made, the callbacks registered by associated stop_callback objects are synchronously called. If an invocation of a callback exits via an exception then terminate is invoked ([except.terminate]). - -A stop request includes notifying all condition variables of type condition_variable_any temporarily registered during an interruptible wait ([thread.condvarany.intwait]). - -Postconditions: stop_possible() is false or stop_requested() is true. - -Returns: true if this call made a stop request; otherwise false. - -33.3.5.4. Non-member functions [stopsource.nonmembers] -[[nodiscard]] friend bool - operator==(const stop_source& lhs, const stop_source& rhs) noexcept; -Returns: true if lhs and rhs have ownership of the same stop state or if both lhs and rhs do not have ownership of a stop state; otherwise false. - -friend void swap(stop_source& x, stop_source& y) noexcept; -Effects: Equivalent to: x.swap(y). - -33.3.6. Class template stop_callback [stopcallback] -33.3.6.1. General [stopcallback.general] -namespace std { - template - class stop_callback { - public: - using callback_type = CallbackFn; - - // 33.3.5.2, constructors and destructor - template - explicit stop_callback(const stop_token& st, CInitializer&& cbinit) - noexcept(is_nothrow_constructible_v); - template - explicit stop_callback(stop_token&& st, CInitializer&& cbinit) - noexcept(is_nothrow_constructible_v); - ~stop_callback(); - - stop_callback(const stop_callback&) = delete; - stop_callback(stop_callback&&) = delete; - stop_callback& operator=(const stop_callback&) = delete; - stop_callback& operator=(stop_callback&&) = delete; - - private: - CallbackFn callbackcallback-fn; // exposition only - }; - - template - stop_callback(stop_token, CallbackFn) -> stop_callback; -} -Mandates: stop_callback is instantiated with an argument for the template parameter CallbackFn that satisfies both invocable and destructible. - -Preconditions: stop_callback is instantiated with an argument for the template parameter Callback that models both invocable and destructible. - -Remarks: For a type Initializer, if stoppable-callback-for is satisfied, then stoppable-callback-for is modeled. The exposition-only callback-fn member is the associated callback function ([stoptoken.concepts]) of stop_callback objects. - -33.3.6.2. Constructors and destructor [stopcallback.cons] -template -explicit stop_callback(const stop_token& st, CInitializer&& cbinit) - noexcept(is_nothrow_constructible_v); -template -explicit stop_callback(stop_token&& st, CInitializer&& cbinit) - noexcept(is_nothrow_constructible_v); -Constraints: CallbackFn and CInitializer satisfy constructible_from. - -Preconditions: Callback and C model constructible_from. - -Effects: Initializes callbackcallback-fn with std::forward(cbinit) and executes a stoppable callback registration ([stoptoken.concepts]) . If st.stop_requested() is true, then std::forward<Callback>(callback)() is evaluated in the current thread before the constructor returns. Otherwise, if st has ownership of a stop state, acquires shared ownership of that stop state and registers the callback with that stop state such that std::forward<Callback>(callback)() is evaluated by the first call to request_stop() on an associated stop_source. If a callback is registered with st's shared stop state, then *this acquires shared ownership of that stop state. - -Throws: Any exception thrown by the initialization of callback. - -Remarks: If evaluating std::forward(callback)() exits via an exception, then terminate is invoked ([except.terminate]). - -~stop_callback(); -Effects: Unregisters the callback from the owned stop state, if any. The destructor does not block waiting for the execution of another callback registered by an associated stop_callback. If callback is concurrently executing on another thread, then the return from the invocation of callback strongly happens before ([intro.races]) callback is destroyed. If callback is executing on the current thread, then the destructor does not block ([defns.block]) waiting for the return from the invocation of callback. Releases Executes a stoppable callback deregistration ([stoptoken.concepts]) and releases ownership of the stop state, if any. - -Insert a new subclause, Class never_stop_token [stoptoken.never], after subclause Class template stop_callback [stopcallback], as a new subclause of Stop tokens [thread.stoptoken]. - -33.3.7. Class never_stop_token [stoptoken.never] -33.3.7.1. General [stoptoken.never.general] -The class never_stop_token models the unstoppable_token concept. It provides a stop token interface, but also provides static information that a stop is never possible nor requested. - -namespace std { - class never_stop_token { - struct callback-type { // exposition only - explicit callback-type(never_stop_token, auto&&) noexcept {} - }; - public: - template - using callback_type = callback-type; - - static constexpr bool stop_requested() noexcept { return false; } - static constexpr bool stop_possible() noexcept { return false; } - - bool operator==(const never_stop_token&) const = default; - }; -} -Insert a new subclause, Class inplace_stop_token [stoptoken.inplace], after the subclause added above, as a new subclause of Stop tokens [thread.stoptoken]. - -33.3.8. Class inplace_stop_token [stoptoken.inplace] -33.3.8.1. General [stoptoken.inplace.general] -The class inplace_stop_token models the concept stoppable_token. It references the stop state of its associated inplace_stop_source object ([stopsource.inplace]), if any. - -namespace std { - class inplace_stop_token { - public: - template - using callback_type = inplace_stop_callback; - - inplace_stop_token() = default; - bool operator==(const inplace_stop_token&) const = default; - - // [stoptoken.inplace.mem], member functions - bool stop_requested() const noexcept; - bool stop_possible() const noexcept; - void swap(inplace_stop_token&) noexcept; - - private: - const inplace_stop_source* stop-source = nullptr; // exposition only - }; -} -33.3.8.2. Member functions [stoptoken.inplace.members] -void swap(inplace_stop_token& rhs) noexcept; -Effects: Exchanges the values of stop-source and rhs.stop-source. - -bool stop_requested() const noexcept; -Effects: Equivalent to: return stop-source != nullptr && stop-source->stop_requested(); - -As specified in [basic.life], the behavior of stop_requested() is undefined unless the call strongly happens before the start of the destructor of the associated inplace_stop_source, if any. - -bool stop_possible() const noexcept; -Returns: stop-source != nullptr. - -As specified in [basic.stc.general], the behavior of stop_possible() is implementation-defined unless the call strongly happens before the end of the storage duration of the associated inplace_stop_source object, if any. - -Insert a new subclause, Class inplace_stop_source [stopsource.inplace], after the subclause added above, as a new subclause of Stop tokens [thread.stoptoken]. - -33.3.9. Class inplace_stop_source [stopsource.inplace] -33.3.9.1. General [stopsource.inplace.general] -The class inplace_stop_source models stoppable-source. - -namespace std { - class inplace_stop_source { - public: - // [stopsource.inplace.cons], constructors, copy, and assignment - constexpr inplace_stop_source() noexcept; - - inplace_stop_source(inplace_stop_source&&) = delete; - inplace_stop_source(const inplace_stop_source&) = delete; - inplace_stop_source& operator=(inplace_stop_source&&) = delete; - inplace_stop_source& operator=(const inplace_stop_source&) = delete; - ~inplace_stop_source(); - - //[stopsource.inplace.mem], stop handling - constexpr inplace_stop_token get_token() const noexcept; - static constexpr bool stop_possible() noexcept { return true; } - bool stop_requested() const noexcept; - bool request_stop() noexcept; - }; -} -33.3.9.2. Constructors, copy, and assignment [stopsource.inplace.cons] -constexpr inplace_stop_source() noexcept; -Effects: Initializes a new stop state inside *this. - -Postconditions: stop_requested() is false. - -33.3.9.3. Members [stopsource.inplace.mem] -constexpr inplace_stop_token get_token() const noexcept; -Returns: A new associated inplace_stop_token object. The inplace_stop_token object’s stop-source member is equal to this. - -bool stop_requested() const noexcept; -Returns: true if the stop state inside *this has received a stop request; otherwise, false. - -bool request_stop() noexcept; -Effects: Executes a stop request operation ([stoptoken.concepts]). - -Postconditions: stop_requested() is true. - -Insert a new subclause, Class template inplace_stop_callback [stopcallback.inplace], after the subclause added above, as a new subclause of Stop tokens [thread.stoptoken]. - -33.3.10. Class template inplace_stop_callback [stopcallback.inplace] -33.3.10.1. General [stopcallback.inplace.general] -namespace std { - template - class inplace_stop_callback { - public: - using callback_type = CallbackFn; - - // [stopcallback.inplace.cons], constructors and destructor - template - explicit inplace_stop_callback(inplace_stop_token st, Initializer&& init) - noexcept(is_nothrow_constructible_v); - ~inplace_stop_callback(); - - inplace_stop_callback(inplace_stop_callback&&) = delete; - inplace_stop_callback(const inplace_stop_callback&) = delete; - inplace_stop_callback& operator=(inplace_stop_callback&&) = delete; - inplace_stop_callback& operator=(const inplace_stop_callback&) = delete; - - private: - CallbackFn callback-fn; // exposition only - }; - - template - inplace_stop_callback(inplace_stop_token, CallbackFn) - -> inplace_stop_callback; -} -Mandates: CallbackFn satisfies both invocable and destructible. - -Remarks: For a type Initializer, if stoppable-callback-for is satisfied, then stoppable-callback-for is modeled. For an inplace_stop_callback object, the exposition-only callback-fn member is its associated callback function ([stoptoken.concepts]). - -33.3.10.2. Constructors and destructor [stopcallback.inplace.cons] -template - explicit inplace_stop_callback(inplace_stop_token st, Initializer&& init) - noexcept(is_nothrow_constructible_v); -Constraints: constructible_from is satisfied. - -Effects: Initializes callback-fn with std::forward(init) and executes a stoppable callback registration ([stoptoken.concepts]). - -~inplace_stop_callback(); -Effects: Executes a stoppable callback deregistration ([stoptoken.concepts]). - -Insert a new top-level clause - -34. Execution control library [exec] -34.1. General [exec.general] -This Clause describes components supporting execution of function objects [function.objects]. - -The following subclauses describe the requirements, concepts, and components for execution control primitives as summarized in Table 1. - -Table N: Execution control library summary [tab:execution.summary] -Subclause Header -[exec.sched] Schedulers -[exec.recv] Receivers -[exec.opstate] Operation states -[exec.snd] Senders -Table 2 shows the types of customization point objects [customization.point.object] used in the execution control library: - -Table N+1: Types of customization point objects in the execution control library [tab:execution.cpos] -Customization point object type Purpose Examples -core provide core execution functionality, and connection between core components e.g., connect, start -completion functions called by senders to announce the completion of the work (success, error, or cancellation) set_value, set_error, set_stopped -senders allow the specialization of the provided sender algorithms -sender factories (e.g., schedule, just, read_env) -sender adaptors (e.g., continues_on, then, let_value) -sender consumers (e.g., sync_wait) -queries allow querying different properties of objects -general queries (e.g., get_allocator, get_stop_token) -environment queries (e.g., get_scheduler, get_delegation_scheduler) -scheduler queries (e.g., get_forward_progress_guarantee) -sender attribute queries (e.g., get_completion_scheduler) -This clause makes use of the following exposition-only entities: - -For a subexpression expr, let MANDATE-NOTHROW(expr) be expression-equivalent to expr. - -Mandates: noexcept(expr) is true. - -namespace std { - template - concept movable-value = // exposition only - move_constructible> && - constructible_from, T> && - (!is_array_v>); -} -For function types F1 and F2 denoting R1(Args1...) and R2(Args2...) respectively, MATCHING-SIG(F1, F2) is true if and only if same_as is true. - -For a subexpression err, let Err be decltype((err)) and let AS-EXCEPT-PTR(err) be: - -err if decay_t denotes the type exception_ptr. - -Mandates: err != exception_ptr() is true. - -Otherwise, make_exception_ptr(system_error(err)) if decay_t denotes the type error_code. - -Otherwise, make_exception_ptr(err). - -34.2. Queries and queryables [exec.queryable] -34.2.1. General [exec.queryable.general] -A queryable object is a read-only collection of key/value pairs where each key is a customization point object known as a query object. A query is an invocation of a query object with a queryable object as its first argument and a (possibly empty) set of additional arguments. A query imposes syntactic and semantic requirements on its invocations. - -Let q be a query object, let args be a (possibly empty) pack of subexpressions, let env be a subexpression that refers to a queryable object o of type O, and let cenv be a subexpression referring to o such that decltype((cenv)) is const O&. The expression q(env, args...) is equal to ([concepts.equality]) the expression q(cenv, args...). - -The type of a query expression can not be void. - -The expression q(env, args...) is equality-preserving ([concepts.equality]) and does not modify the query object or the arguments. - -If the expression env.query(q, args...) is well-formed, then it is expression-equivalent to q(env, args...). - -Unless otherwise specified, the result of a query is valid as long as the queryable object is valid. - -34.2.2. queryable concept [exec.queryable.concept] -namespace std { - template - concept queryable = destructible; // exposition only -} -The exposition-only queryable concept specifies the constraints on the types of queryable objects. - -Let env be an object of type Env. The type Env models queryable if for each callable object q and a pack of subexpressions args, if requires { q(env, args...) } is true then q(env, args...) meets any semantic requirements imposed by q. - -34.3. Asynchronous operations [async.ops] -An execution resource is a program entity that manages a (possibly dynamic) set of execution agents ([thread.req.lockable.general]), which it uses to execute parallel work on behalf of callers. [Example 1: The currently active thread, a system-provided thread pool, and uses of an API associated with an external hardware accelerator are all examples of execution resources. -- end example] Execution resources execute asynchronous operations. An execution resource is either valid or invalid. - -An asynchronous operation is a distinct unit of program execution that: - -... is explicitly created. - -... can be explicitly started once at most. - -... once started, eventually completes exactly once with a (possibly empty) set of result datums and in exactly one of three dispositions: success, failure, or cancellation. - -A successful completion, also known as a value completion, can have an arbitrary number of result datums. - -A failure completion, also known as an error completion, has a single result datum. - -A cancellation completion, also known as a stopped completion, has no result datum. - -An asynchronous operation’s async result is its disposition and its (possibly empty) set of result datums. - -... can complete on a different execution resource than the execution resource on which it started. - -... can create and start other asynchronous operations called child operations. A child operation is an asynchronous operation that is created by the parent operation and, if started, completes before the parent operation completes. A parent operation is the asynchronous operation that created a particular child operation. - -An asynchronous operation can in fact execute synchronously; that is, it can complete during the execution of its start operation on the thread of execution that started it. - -An asynchronous operation has associated state known as its operation state. - -An asynchronous operation has an associated environment. An environment is a queryable object ([exec.queryable]) representing the execution-time properties of the operation’s caller. The caller of an asynchronous operation is its parent operation or the function that created it. An asynchronous operation’s operation state owns the operation’s environment. - -An asynchronous operation has an associated receiver. A receiver is an aggregation of three handlers for the three asynchronous completion dispositions: a value completion handler for a value completion, an error completion handler for an error completion, and a stopped completion handler for a stopped completion. A receiver has an associated environment. An asynchronous operation’s operation state owns the operation’s receiver. The environment of an asynchronous operation is equal to its receiver’s environment. - -For each completion disposition, there is a completion function. A completion function is a customization point object ([customization.point.object]) that accepts an asynchronous operation’s receiver as the first argument and the result datums of the asynchronous operation as additional arguments. The value completion function invokes the receiver’s value completion handler with the value result datums; likewise for the error completion function and the stopped completion function. A completion function has an associated type known as its completion tag that is the unqualified type of the completion function. A valid invocation of a completion function is called a completion operation. - -The lifetime of an asynchronous operation, also known as the operation’s async lifetime, begins when its start operation begins executing and ends when its completion operation begins executing. If the lifetime of an asynchronous operation’s associated operation state ends before the lifetime of the asynchronous operation, the behavior is undefined. After an asynchronous operation executes a completion operation, its associated operation state is invalid. Accessing any part of an invalid operation state is undefined behavior. - -An asynchronous operation shall not execute a completion operation before its start operation has begun executing. After its start operation has begun executing, exactly one completion operation shall execute. The lifetime of an asynchronous operation’s operation state can end during the execution of the completion operation. - -A sender is a factory for one or more asynchronous operations. Connecting a sender and a receiver creates an asynchronous operation. The asynchronous operation’s associated receiver is equal to the receiver used to create it, and its associated environment is equal to the environment associated with the receiver used to create it. The lifetime of an asynchronous operation’s associated operation state does not depend on the lifetimes of either the sender or the receiver from which it was created. A sender is started when it is connected to a receiver and the resulting asynchronous operation is started. A sender’s async result is the async result of the asynchronous operation created by connecting it to a receiver. A sender sends its results by way of the asynchronous operation(s) it produces, and a receiver receives those results. A sender is either valid or invalid; it becomes invalid when its parent sender (see below) becomes invalid. - -A scheduler is an abstraction of an execution resource with a uniform, generic interface for scheduling work onto that resource. It is a factory for senders whose asynchronous operations execute value completion operations on an execution agent belonging to the scheduler’s associated execution resource. A schedule-expression obtains such a sender from a scheduler. A schedule sender is the result of a schedule expression. On success, an asynchronous operation produced by a schedule sender executes a value completion operation with an empty set of result datums. Multiple schedulers can refer to the same execution resource. A scheduler can be valid or invalid. A scheduler becomes invalid when the execution resource to which it refers becomes invalid, as do any schedule senders obtained from the scheduler, and any operation states obtained from those senders. - -An asynchronous operation has one or more associated completion schedulers for each of its possible dispositions. A completion scheduler is a scheduler whose associated execution resource is used to execute a completion operation for an asynchronous operation. A value completion scheduler is a scheduler on which an asynchronous operation’s value completion operation can execute. Likewise for error completion schedulers and stopped completion schedulers. - -A sender has an associated queryable object ([exec.queryable]) known as its attributes that describes various characteristics of the sender and of the asynchronous operation(s) it produces. For each disposition, there is a query object for reading the associated completion scheduler from a sender’s attributes; i.e., a value completion scheduler query object for reading a sender’s value completion scheduler, etc. If a completion scheduler query is well-formed, the returned completion scheduler is unique for that disposition for any asynchronous operation the sender creates. A schedule sender is required to have a value completion scheduler attribute whose value is equal to the scheduler that produced the schedule sender. - -A completion signature is a function type that describes a completion operation. An asynchronous operation has a finite set of possible completion signatures corresponding to the completion operations that the asynchronous operation potentially evaluates ([basic.def.odr]). For a completion function set, receiver rcvr, and pack of arguments args, let c be the completion operation set(rcvr, args...), and let F be the function type decltype(auto(set))(decltype((args))...). A completion signature Sig is associated with c if and only if MATCHING-SIG(Sig, F) is true ([exec.general]). Together, a sender type and an environment type Env determine the set of completion signatures of an asynchronous operation that results from connecting the sender with a receiver that has an environment of type Env. The type of the receiver does not affect an asynchronous operation’s completion signatures, only the type of the receiver’s environment. - -A sender algorithm is a function that takes and/or returns a sender. There are three categories of sender algorithms: - -A sender factory is a function that takes non-senders as arguments and that returns a sender. - -A sender adaptor is a function that constructs and returns a parent sender from a set of one or more child senders and a (possibly empty) set of additional arguments. An asynchronous operation created by a parent sender is a parent operation to the child operations created by the child senders. - -A sender consumer is a function that takes one or more senders and a (possibly empty) set of additional arguments, and whose return type is not the type of a sender. - -34.4. Header synopsis [exec.syn] -namespace std { - // [exec.general], helper concepts - template - concept movable-value = see below; // exposition only - - template - concept decays-to = same_as, To>; // exposition only - - template - concept class-type = decays-to && is_class_v; // exposition only - - // [exec.queryable], queryable objects - template - concept queryable = see above; // exposition only - - // [exec.queries], queries - struct forwarding_query_t { see below }; - struct get_allocator_t { see below }; - struct get_stop_token_t { see below }; - - inline constexpr forwarding_query_t forwarding_query{}; - inline constexpr get_allocator_t get_allocator{}; - inline constexpr get_stop_token_t get_stop_token{}; - - template - using stop_token_of_t = - remove_cvref_t()))>; - - template - concept forwarding-query = // exposition only - forwarding_query(T{}); -} - -namespace std::execution { - // [exec.queries], queries - enum class forward_progress_guarantee { - concurrent, - parallel, - weakly_parallel - }; - struct get_domain_t { see below }; - struct get_scheduler_t { see below }; - struct get_delegation_scheduler_t { see below }; - struct get_forward_progress_guarantee_t { see below }; - template - struct get_completion_scheduler_t { see below }; - - inline constexpr get_domain_t get_domain{}; - inline constexpr get_scheduler_t get_scheduler{}; - inline constexpr get_delegation_scheduler_t get_delegation_scheduler{}; - inline constexpr get_forward_progress_guarantee_t get_forward_progress_guarantee{}; - template - inline constexpr get_completion_scheduler_t get_completion_scheduler{}; - - struct empty_env {}; - struct get_env_t { see below }; - inline constexpr get_env_t get_env{}; - - template - using env_of_t = decltype(get_env(declval())); - - // [exec.domain.default], execution domains - struct default_domain; - - // [exec.sched], schedulers - struct scheduler_t {}; - - template - concept scheduler = see below; - - // [exec.recv], receivers - struct receiver_t {}; - - template - concept receiver = see below; - - template - concept receiver_of = see below; - - struct set_value_t { see below }; - struct set_error_t { see below }; - struct set_stopped_t { see below }; - - inline constexpr set_value_t set_value{}; - inline constexpr set_error_t set_error{}; - inline constexpr set_stopped_t set_stopped{}; - - // [exec.opstate], operation states - struct operation_state_t {}; - - template - concept operation_state = see below; - - struct start_t { see below }; - inline constexpr start_t start{}; - - // [exec.snd], senders - struct sender_t {}; - - template - concept sender = see below; - - template - concept sender_in = see below; - - template - concept sender_to = see below; - - template - struct type-list; // exposition only - - // [exec.getcomplsigs], completion signatures - struct get_completion_signatures_t { see below }; - inline constexpr get_completion_signatures_t get_completion_signatures {}; - - template - requires sender_in - using completion_signatures_of_t = call-result-t; - - template - using decayed-tuple = tuple...>; // exposition only - - template - using variant-or-empty = see below; // exposition only - - template class Tuple = decayed-tuple, - template class Variant = variant-or-empty> - requires sender_in - using value_types_of_t = see below; - - template class Variant = variant-or-empty> - requires sender_in - using error_types_of_t = see below; - - template - requires sender_in - inline constexpr bool sends_stopped = see below; - - template - using single-sender-value-type = see below; // exposition only - - template - concept single-sender = see below; // exposition only - - template - using tag_of_t = see below; - - // [exec.snd.transform], sender transformations - template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender( - Domain dom, Sndr&& sndr, const Env&... env) noexcept(see below); - - // [exec.snd.transform.env], environment transformations - template - constexpr queryable decltype(auto) transform_env( - Domain dom, Sndr&& sndr, Env&& env) noexcept; - - // [exec.snd.apply], sender algorithm application - template - constexpr decltype(auto) apply_sender( - Domain dom, Tag, Sndr&& sndr, Args&&... args) noexcept(see below); - - // [exec.connect], the connect sender algorithm - struct connect_t { see below }; - inline constexpr connect_t connect{}; - - template - using connect_result_t = - decltype(connect(declval(), declval())); - - // [exec.factories], sender factories - struct just_t { see below }; - struct just_error_t { see below }; - struct just_stopped_t { see below }; - struct schedule_t { see below }; - - inline constexpr just_t just{}; - inline constexpr just_error_t just_error{}; - inline constexpr just_stopped_t just_stopped{}; - inline constexpr schedule_t schedule{}; - inline constexpr unspecified read{}; - - template - using schedule_result_t = decltype(schedule(declval())); - - // [exec.adapt], sender adaptors - template - struct sender_adaptor_closure { }; - - struct starts_on_t { see below }; - struct continues_on_t { see below }; - struct on_t { see below }; - struct schedule_from_t { see below }; - struct then_t { see below }; - struct upon_error_t { see below }; - struct upon_stopped_t { see below }; - struct let_value_t { see below }; - struct let_error_t { see below }; - struct let_stopped_t { see below }; - struct bulk_t { see below }; - struct split_t { see below }; - struct when_all_t { see below }; - struct when_all_with_variant_t { see below }; - struct into_variant_t { see below }; - struct stopped_as_optional_t { see below }; - struct stopped_as_error_t { see below }; - - inline constexpr starts_on_t starts_on{}; - inline constexpr continues_on_t continues_on{}; - inline constexpr on_t on{}; - inline constexpr schedule_from_t schedule_from{}; - inline constexpr then_t then{}; - inline constexpr upon_error_t upon_error{}; - inline constexpr upon_stopped_t upon_stopped{}; - inline constexpr let_value_t let_value{}; - inline constexpr let_error_t let_error{}; - inline constexpr let_stopped_t let_stopped{}; - inline constexpr bulk_t bulk{}; - inline constexpr split_t split{}; - inline constexpr when_all_t when_all{}; - inline constexpr when_all_with_variant_t when_all_with_variant{}; - inline constexpr into_variant_t into_variant{}; - inline constexpr stopped_as_optional_t stopped_as_optional{}; - inline constexpr stopped_as_error_t stopped_as_error{}; - - // [exec.utils], sender and receiver utilities - // [exec.utils.cmplsigs] - template - concept completion-signature = // exposition only - see below; - - template - struct completion_signatures {}; - - template // exposition only - concept valid-completion-signatures = see below; - - // [exec.utils.tfxcmplsigs] - template< - valid-completion-signatures InputSignatures, - valid-completion-signatures AdditionalSignatures = completion_signatures<>, - template class SetValue = see below, - template class SetError = see below, - valid-completion-signatures SetStopped = completion_signatures> - using transform_completion_signatures = completion_signatures; - - template< - sender Sndr, - class Env = empty_env, - valid-completion-signatures AdditionalSignatures = completion_signatures<>, - template class SetValue = see below, - template class SetError = see below, - valid-completion-signatures SetStopped = completion_signatures> - requires sender_in - using transform_completion_signatures_of = - transform_completion_signatures< - completion_signatures_of_t, - AdditionalSignatures, SetValue, SetError, SetStopped>; - - // [exec.ctx], execution resources - // [exec.run.loop], run_loop - class run_loop; -} - -namespace std::this_thread { - // [exec.consumers], consumers - struct sync_wait_t { see below }; - struct sync_wait_with_variant_t { see below }; - - inline constexpr sync_wait_t sync_wait{}; - inline constexpr sync_wait_with_variant_t sync_wait_with_variant{}; -} - -namespace std::execution { - // [exec.as.awaitable] - struct as_awaitable_t { see below }; - inline constexpr as_awaitable_t as_awaitable{}; - - // [exec.with.awaitable.senders] - template - struct with_awaitable_senders; -} -The exposition-only type variant-or-empty is defined as follows: - -If sizeof...(Ts) is greater than zero, variant-or-empty denotes variant where Us... is the pack decay_t... with duplicate types removed. - -Otherwise, variant-or-empty denotes the exposition-only class type: - -namespace std::execution { - struct empty-variant { // exposition only - empty-variant() = delete; - }; -} -For types Sndr and Env, single-sender-value-type is an alias for: - -value_types_of_t if that type is well-formed, - -Otherwise, void if value_types_of_t is variant> or variant<>, - -Otherwise, value_types_of_t if that type is well-formed, - -Otherwise, single-sender-value-type is ill-formed. - -The exposition-only concept single-sender is defined as follows: - -namespace std::execution { - template - concept single-sender = - sender_in && - requires { - typename single-sender-value-type; - }; -} -34.5. Queries [exec.queries] -34.5.1. forwarding_query [exec.fwd.env] -forwarding_query asks a query object whether it should be forwarded through queryable adaptors. - -The name forwarding_query denotes a query object. For some query object q of type Q, forwarding_query(q) is expression-equivalent to: - -MANDATE-NOTHROW(q.query(forwarding_query)) if that expression is well-formed. - -Mandates: The expression above has type bool and is a core constant expression if q is a core constant expression. - -Otherwise, true if derived_from is true. - -Otherwise, false. - -34.5.2. get_allocator [exec.get.allocator] -get_allocator asks a queryable object for its associated allocator. - -The name get_allocator denotes a query object. For a subexpression env, get_allocator(env) is expression-equivalent to MANDATE-NOTHROW(as_const(env).query(get_allocator)). - -Mandates: If the expression above is well-formed, its type satisfies simple-allocator ([allocator.requirements.general]). - -forwarding_query(get_allocator) is a core constant expression and has value true. - -34.5.3. get_stop_token [exec.get.stop.token] -get_stop_token asks a queryable object for an associated stop token. - -The name get_stop_token denotes a query object. For a subexpression env, get_stop_token(env) is expression-equivalent to: - -MANDATE-NOTHROW(as_const(env).query(get_stop_token)) if that expression is well-formed. - -Mandates: The type of the expression above satisfies stoppable_token. - -Otherwise, never_stop_token{}. - -forwarding_query(get_stop_token) is a core constant expression and has value true. - -34.5.4. execution::get_env [exec.get.env] -execution::get_env is a customization point object. For a subexpression o, execution::get_env(o) is expression-equivalent to: - -MANDATE-NOTHROW(as_const(o).get_env()) if that expression is well-formed. - -Mandates: The type of the expression above satisfies queryable ([exec.queryable]). - -Otherwise, empty_env{}. - -The value of get_env(o) shall be valid while o is valid. - -When passed a sender object, get_env returns the sender’s associated attributes. When passed a receiver, get_env returns the receiver’s associated execution environment. - -34.5.5. execution::get_domain [exec.get.domain] -get_domain asks a queryable object for its associated execution domain tag. - -The name get_domain denotes a query object. For a subexpression env, get_domain(env) is expression-equivalent to MANDATE-NOTHROW(as_const(env).query(get_domain)). - -forwarding_query(execution::get_domain) is a core constant expression and has value true. - -34.5.6. execution::get_scheduler [exec.get.scheduler] -get_scheduler asks a queryable object for its associated scheduler. - -The name get_scheduler denotes a query object. For a subexpression env, get_scheduler(env) is expression-equivalent to MANDATE-NOTHROW(as_const(env).query(get_scheduler)). - -Mandates: If the expression above is well-formed, its type satisfies scheduler. - -forwarding_query(execution::get_scheduler) is a core constant expression and has value true. - -34.5.7. execution::get_delegation_scheduler [exec.get.delegation.scheduler] -get_delegation_scheduler asks a queryable object for a scheduler that can be used to delegate work to for the purpose of forward progress delegation ([intro.progress]). - -The name get_delegation_scheduler denotes a query object. For a subexpression env, get_delegation_scheduler(env) is expression-equivalent to MANDATE-NOTHROW(as_const(env).query(get_delegation_scheduler)). - -Mandates: If the expression above is well-formed, its type satisfies scheduler. - -forwarding_query(execution::get_delegation_scheduler) is a core constant expression and has value true. - -34.5.8. execution::get_forward_progress_guarantee [exec.get.forward.progress.guarantee] -namespace std::execution { - enum class forward_progress_guarantee { - concurrent, - parallel, - weakly_parallel - }; -} -get_forward_progress_guarantee asks a scheduler about the forward progress guarantee of execution agents created by that scheduler’s associated execution resource ([intro.progress]). - -The name get_forward_progress_guarantee denotes a query object. For a subexpression sch, let Sch be decltype((sch)). If Sch does not satisfy scheduler, get_forward_progress_guarantee is ill-formed. Otherwise, get_forward_progress_guarantee(sch) is expression-equivalent to: - -MANDATE-NOTHROW(as_const(sch).query(get_forward_progress_guarantee)), if that expression is well-formed. - -Mandates: The type of the expression above is forward_progress_guarantee. - -Otherwise, forward_progress_guarantee::weakly_parallel. - -If get_forward_progress_guarantee(sch) for some scheduler sch returns forward_progress_guarantee::concurrent, all execution agents created by that scheduler’s associated execution resource shall provide the concurrent forward progress guarantee. If it returns forward_progress_guarantee::parallel, all such execution agents shall provide at least the parallel forward progress guarantee. - -34.5.9. execution::get_completion_scheduler [exec.completion.scheduler] -get_completion_scheduler obtains the completion scheduler associated with a completion tag from a sender’s attributes. - -The name get_completion_scheduler denotes a query object template. For a subexpression q, the expression get_completion_scheduler(q) is ill-formed if completion-tag is not one of set_value_t, set_error_t, or set_stopped_t. Otherwise, get_completion_scheduler(q) is expression-equivalent to MANDATE-NOTHROW(as_const(q).query(get_completion_scheduler)). - -Mandates: If the expression above is well-formed, its type satisfies scheduler. - -Let completion-fn be a completion function ([async.ops]); let completion-tag be the associated completion tag of completion-fn; let args be a pack of subexpressions; and let sndr be a subexpression such that sender is true and get_completion_scheduler(get_env(sndr)) is well-formed and denotes a scheduler sch. If an asynchronous operation created by connecting sndr with a receiver rcvr causes the evaluation of completion-fn(rcvr, args...), the behavior is undefined unless the evaluation happens on an execution agent that belongs to sch's associated execution resource. - -The expression forwarding_query(get_completion_scheduler) is a core constant expression and has value true. - -34.6. Schedulers [exec.sched] -The scheduler concept defines the requirements of a scheduler type ([async.ops]). schedule is a customization point object that accepts a scheduler. A valid invocation of schedule is a schedule-expression. - -namespace std::execution { - template - concept scheduler = - derived_from::scheduler_concept, scheduler_t> && - queryable && - requires(Sch&& sch) { - { schedule(std::forward(sch)) } -> sender; - { auto(get_completion_scheduler( - get_env(schedule(std::forward(sch))))) } - -> same_as>; - } && - equality_comparable> && - copy_constructible>; -} -Let Sch be the type of a scheduler and let Env be the type of an execution environment for which sender_in, Env> is satisfied. Then sender-in-of, Env> shall be modeled. - -None of a scheduler’s copy constructor, destructor, equality comparison, or swap member functions shall exit via an exception. None of these member functions, nor a scheduler type’s schedule function, shall introduce data races as a result of potentially concurrent ([intro.races]) invocations of those functions from different threads. - -For any two values sch1 and sch2 of some scheduler type Sch, sch1 == sch2 shall return true only if both sch1 and sch2 share the same associated execution resource. - -For a given scheduler expression sch, the expression get_completion_scheduler(get_env(schedule(sch))) shall compare equal to sch. - -For a given scheduler expression sch, if the expression get_domain(sch) is well-formed, then the expression get_domain(get_env(schedule(sch))) is also well-formed and has the same type. - -A scheduler type’s destructor shall not block pending completion of any receivers connected to the sender objects returned from schedule. The ability to wait for completion of submitted function objects can be provided by the associated execution resource of the scheduler. - -34.7. Receivers [exec.recv] -34.7.1. Receiver concepts [exec.recv.concepts] -A receiver represents the continuation of an asynchronous operation. The receiver concept defines the requirements for a receiver type ([async.ops]). The receiver_of concept defines the requirements for a receiver type that is usable as the first argument of a set of completion operations corresponding to a set of completion signatures. The get_env customization point object is used to access a receiver’s associated environment. - -namespace std::execution { - template - concept receiver = - derived_from::receiver_concept, receiver_t> && - requires(const remove_cvref_t& rcvr) { - { get_env(rcvr) } -> queryable; - } && - move_constructible> && // rvalues are movable, and - constructible_from, Rcvr>; // lvalues are copyable - - template - concept valid-completion-for = // exposition only - requires (Signature* sig) { - [](Tag(*)(Args...)) - requires callable, Args...> - {}(sig); - }; - - template - concept has-completions = // exposition only - requires (Completions* completions) { - []...Sigs>(completion_signatures*) - {}(completions); - }; - - template - concept receiver_of = - receiver && has-completions; -} -Class types that are marked final do not model the receiver concept. - -Let rcvr be a receiver and let op_state be an operation state associated with an asynchronous operation created by connecting rcvr with a sender. Let token be a stop token equal to get_stop_token(get_env(rcvr)). token shall remain valid for the duration of the asynchronous operation’s lifetime ([async.ops]). This means that, unless it knows about further guarantees provided by the type of rcvr, the implementation of op_state can not use token after it executes a completion operation. This also implies that any stop callbacks registered on token must be destroyed before the invocation of the completion operation. - -34.7.2. execution::set_value [exec.set.value] -set_value is a value completion function ([async.ops]). Its associated completion tag is set_value_t. The expression set_value(rcvr, vs...) for a subexpression rcvr and pack of subexpressions vs is ill-formed if rcvr is an lvalue or an rvalue of const type. Otherwise, it is expression-equivalent to MANDATE-NOTHROW(rcvr.set_value(vs...)). - -34.7.3. execution::set_error [exec.set.error] -set_error is an error completion function ([async.ops]). Its associated completion tag is set_error_t. The expression set_error(rcvr, err) for some subexpressions rcvr and err is ill-formed if rcvr is an lvalue or an rvalue of const type. Otherwise, it is expression-equivalent to MANDATE-NOTHROW(rcvr.set_error(err)). - -34.7.4. execution::set_stopped [exec.set.stopped] -set_stopped is a stopped completion function ([async.ops]). Its associated completion tag is set_stopped_t. The expression set_stopped(rcvr) for a subexpression rcvr is ill-formed if rcvr is an lvalue or an rvalue of const type. Otherwise, it is expression-equivalent to MANDATE-NOTHROW(rcvr.set_stopped()). - -34.8. Operation states [exec.opstate] -The operation_state concept defines the requirements of an operation state type ([async.ops]). - -namespace std::execution { - template - concept operation_state = - derived_from && - is_object_v && - requires (O& o) { - { start(o) } noexcept; - }; -} -If an operation_state object is destroyed during the lifetime of its asynchronous operation ([async.ops]), the behavior is undefined. The operation_state concept does not impose requirements on any operations other than destruction and start, including copy and move operations. Invoking any such operation on an object whose type models operation_state can lead to undefined behavior. - -The program is ill-formed if it performs a copy or move construction or assigment operation on an operation state object created by connecting a library-provided sender. - -34.8.1. execution::start [exec.opstate.start] -The name start denotes a customization point object that starts ([async.ops]) the asynchronous operation associated with the operation state object. For a subexpression op, the expression start(op) is ill-formed if op is an rvalue. Otherwise, it is expression-equivalent to MANDATE-NOTHROW(op.start()). - -If op.start() does not start ([async.ops]) the asynchronous operation associated with the operation state op, the behavior of calling start(op) is undefined. - -34.9. Senders [exec.snd] -34.9.1. General [exec.snd.general] -For the purposes of this subclause, a sender is an object whose type satisfies the sender concept ([async.ops]). - -Subclauses [exec.factories] and [exec.adapt] define customizable algorithms that return senders. Each algorithm has a default implementation. Let sndr be the result of an invocation of such an algorithm or an object equal to the result ([concepts.equality]), and let Sndr be decltype((sndr)). Let rcvr be a receiver of type Rcvr with associated environment env of type Env such that sender_to is true. For the default implementation of the algorithm that produced sndr, connecting sndr to rcvr and starting the resulting operation state ([async.ops]) necessarily results in the potential evaluation ([basic.def.odr]) of a set of completion operations whose first argument is a subexpression equal to rcvr. Let Sigs be a pack of completion signatures corresponding to this set of completion operations. Then the type of the expression get_completion_signatures(sndr, env) is a specialization of the class template completion_signatures ([exec.utils.cmplsigs]), the set of whose template arguments is Sigs. If a user-provided implementation of the algorithm that produced sndr is selected instead of the default, any completion signature that is in the set of types denoted by completion_signatures_of_t and that is not part of Sigs shall correspond to error or stopped completion operations, unless otherwise specified. - -This subclause makes use of the following exposition-only entities. - -For a queryable object env, FWD-ENV(env) is an expression whose type satisfies queryable such that for a query object q and a pack of subexpressions as, the expression FWD-ENV(env).query(q, as...) is ill-formed if forwarding_query(q) is false; otherwise, it is expression-equivalent to env.query(q, as...). - -For a query object q and a subexpression v, MAKE-ENV(q, v) is an expression env whose type satisfies queryable such that the result of env.query(q) has a value equal to v ([concepts.equality]). Unless otherwise stated, the object to which env.query(q) refers remains valid while env remains valid. - -For two queryable objects env1 and env2, a query object q and a pack of subexpressions as, JOIN-ENV(env1, env2) is an expression env3 whose type satisfies queryable such that env3.query(q, as...) is expression-equivalent to: - -env1.query(q, as...) if that expression is well-formed, - -otherwise, env2.query(q, as...) if that expression is well-formed, - -otherwise, env3.query(q, as...) is ill-formed. - -The results of FWD-ENV, MAKE-ENV, and JOIN-ENV can be context-dependent; i.e., they can evaluate to expressions with different types and value categories in different contexts for the same arguments. - -For a scheduler sch, SCHED-ATTRS(sch) is an expression o1 whose type satisfies queryable such that o1.query(get_completion_scheduler) is a expression with the same type and value as sch where Tag is one of set_value_t or set_stopped_t, and such that o1.query(get_domain) is expression-equivalent to sch.query(get_domain). SCHED-ENV(sch) is an expression o2 whose type satisfies queryable such that o1.query(get_scheduler) is a prvalue with the same type and value as sch, and such that o2.query(get_domain) is expression-equivalent to sch.query(get_domain). - -For two subexpressions rcvr and expr, SET-VALUE(rcvr, expr) is expression-equivalent to (expr, set_value(std::move(rcvr))) if the type of expr is void; otherwise, set_value(std::move(rcvr), expr). TRY-EVAL(rcvr, expr) is equivalent to: - -try { - expr; -} catch(...) { - set_error(std::move(rcvr), current_exception()); -} -if expr is potentially-throwing; otherwise, expr. TRY-SET-VALUE(rcvr, expr) is TRY-EVAL(rcvr, SET-VALUE(rcvr, expr)) except that rcvr is evaluated only once. - -template - constexpr auto completion-domain(const Sndr& sndr) noexcept; -COMPL-DOMAIN(T) is the type of the expression get_domain(get_completion_scheduler(get_env(sndr))). - -Effects: If all of the types COMPL-DOMAIN(set_value_t), COMPL-DOMAIN(set_error_t), and COMPL-DOMAIN(set_stopped_t) are ill-formed, completion-domain(sndr) is a default-constructed prvalue of type Default. Otherwise, if they all share a common type ([meta.trans.other]) (ignoring those types that are ill-formed), then completion-domain(sndr) is a default-constructed prvalue of that type. Otherwise, completion-domain(sndr) is ill-formed. - -template - constexpr decltype(auto) query-with-default( - Tag, const Env& env, Default&& value) noexcept(see below); -Let e be the expression Tag()(env) if that expression is well-formed; otherwise, it is static_cast(std::forward(value)). - -Returns: e. - -Remarks: The expression in the noexcept clause is noexcept(e). - -template - constexpr auto get-domain-early(const Sndr& sndr) noexcept; -Effects: Equivalent to: return Domain(); where Domain is the decayed type of the first of the following expressions that is well-formed: - -get_domain(get_env(sndr)) - -completion-domain(sndr) - -default_domain() - -template - constexpr auto get-domain-late(const Sndr& sndr, const Env& env) noexcept; -Effects: Equivalent to: - -If sender-for is true, then return Domain(); where Domain is the type of the following expression: - -[] { - auto [_, sch, _] = sndr; - return query-or-default(get_domain, sch, default_domain()); -}(); -The continues_on algorithm works in tandem with schedule_from ([exec.schedule.from])) to give scheduler authors a way to customize both how to transition onto (continues_on) and off of (schedule_from) a given execution context. Thus, continues_on ignores the domain of the predecessor and uses the domain of the destination scheduler to select a customization, a property that is unique to continues_on. That is why it is given special treatment here. - -Otherwise, return Domain(); where Domain is the first of the following expressions that is well-formed and whose type is not void: - -get_domain(get_env(sndr)) - -completion-domain(sndr) - -get_domain(env) - -get_domain(get_scheduler(env)) - -default_domain(). - -template - requires is_nothrow_move_constructible_v -struct emplace-from { // exposition only - Fun fun; // exposition only - using type = call-result-t; - - constexpr operator type() && noexcept(nothrow-callable) { - return std::move(fun)(); - } - - constexpr type operator()() && noexcept(nothrow-callable) { - return std::move(fun)(); - } -}; -emplace-from is used to emplace non-movable types into tuple, optional, variant, and similar types. - -struct on-stop-request { // exposition only - inplace_stop_source& stop-src; // exposition only - void operator()() noexcept { stop-src.request_stop(); } -}; -template -struct product-type { // exposition only - T0 t0; // exposition only - T1 t1; // exposition only - ... - Tn tn; // exposition only - - template - constexpr decltype(auto) get(this Self&& self) noexcept; // exposition only - - template - constexpr decltype(auto) apply(this Self&& self, Fn&& fn) // exposition only - noexcept(see below); -}; -product-type is presented here in pseudo-code form for the sake of exposition. It can be approximated in standard C++ with a tuple-like implementation that takes care to keep the type an aggregate that can be used as the initializer of a structured binding declaration. - -An expression of type product-type is usable as the initializer of a structured binding declaration [dcl.struct.bind]. - -template -constexpr decltype(auto) get(this Self&& self) noexcept; -Effects: Equivalent to: - -auto& [...ts] = self; -return std::forward_like(ts...[I]); -template -constexpr decltype(auto) apply(this Self&& self, Fn&& fn) noexcept(see below); -Effects: Equivalent to: - -auto& [...ts] = self; -return std::forward(fn)(std::forward_like(ts)...); -Requires: The expression in the return statement above is well-formed. - -Remarks: The expression in the noexcept clause is true if the return statement above is not potentially throwing; otherwise, false. - -template - constexpr auto make-sender(Tag tag, Data&& data, Child&&... child); -Mandates: The following expressions are true: - -semiregular - -movable-value - -(sender &&...) - -Returns: A prvalue of type basic-sender, decay_t...> that has been direct-list-initialized with the forwarded arguments, where basic-sender is the following exposition-only class template except as noted below: - -namespace std::execution { - template - concept completion-tag = // exposition only - same_as || same_as || same_as; - - template class T, class... Args> - concept valid-specialization = requires { typename T; }; // exposition only - - struct default-impls { // exposition only - static constexpr auto get-attrs = see below; - static constexpr auto get-env = see below; - static constexpr auto get-state = see below; - static constexpr auto start = see below; - static constexpr auto complete = see below; - }; - - template - struct impls-for : default-impls {}; // exposition only - - template // exposition only - using state-type = decay_t>::get-state), Sndr, Rcvr&>>; - - template // exposition only - using env-type = call-result-t< - decltype(impls-for>::get-env), Index, - state-type&, const Rcvr&>; - - template - using child-type = decltype(declval().template get()); // exposition only - - template - using indices-for = remove_reference_t::indices-for; // exposition only - - template - struct basic-state { // exposition only - basic-state(Sndr&& sndr, Rcvr&& rcvr) noexcept(see below) - : rcvr(std::move(rcvr)) - , state(impls-for>::get-state(std::forward(sndr), rcvr)) { } - - Rcvr rcvr; // exposition only - state-type state; // exposition only - }; - - template - requires valid-specialization - struct basic-receiver { // exposition only - using receiver_concept = receiver_t; - - using tag-t = tag_of_t; // exposition only - using state-t = state-type; // exposition only - static constexpr const auto& complete = impls-for::complete; // exposition only - - template - requires callable - void set_value(Args&&... args) && noexcept { - complete(Index(), op->state, op->rcvr, set_value_t(), std::forward(args)...); - } - - template - requires callable - void set_error(Error&& err) && noexcept { - complete(Index(), op->state, op->rcvr, set_error_t(), std::forward(err)); - } - - void set_stopped() && noexcept - requires callable { - complete(Index(), op->state, op->rcvr, set_stopped_t()); - } - - auto get_env() const noexcept -> env-type { - return impls-for::get-env(Index(), op->state, op->rcvr); - } - - basic-state* op; // exposition only - }; - - constexpr auto connect-all = see below; // exposition only - - template - using connect-all-result = call-result-t< // exposition only - decltype(connect-all), basic-state*, Sndr, indices-for>; - - template - requires valid-specialization && - valid-specialization - struct basic-operation : basic-state { // exposition only - using operation_state_concept = operation_state_t; - using tag-t = tag_of_t; // exposition only - - connect-all-result inner-ops; // exposition only - - basic-operation(Sndr&& sndr, Rcvr&& rcvr) noexcept(see below) // exposition only - : basic-state(std::forward(sndr), std::move(rcvr)) - , inner-ops(connect-all(this, std::forward(sndr), indices-for())) - {} - - void start() & noexcept { - auto& [...ops] = inner-ops; - impls-for::start(this->state, this->rcvr, ops...); - } - }; - - template - using completion-signatures-for = see below; // exposition only - - template - struct basic-sender : product-type { // exposition only - using sender_concept = sender_t; - using indices-for = index_sequence_for; // exposition only - - decltype(auto) get_env() const noexcept { - auto& [_, data, ...child] = *this; - return impls-for::get-attrs(data, child...); - } - - template Self, receiver Rcvr> - auto connect(this Self&& self, Rcvr rcvr) noexcept(see below) - -> basic-operation { - return {std::forward(self), std::move(rcvr)}; - } - - template Self, class Env> - auto get_completion_signatures(this Self&& self, Env&& env) noexcept - -> completion-signatures-for { - return {}; - } - }; -} -Remarks: The default template argument for the Data template parameter denotes an unspecified empty trivially copyable class type that models semiregular. - -It is unspecified whether a specialization of basic-sender is an aggregate. - -An expression of type basic-sender is usable as the initializer of a structured binding declaration [dcl.struct.bind]. - -The expression in the noexcept clause of the constructor of basic-state is: - -is_nothrow_move_constructible_v && -nothrow-callable>::get-state), Sndr, Rcvr&> -The object connect-all is initialized with a callable object equivalent to the following lambda: - -[]( - basic-state* op, Sndr&& sndr, index_sequence) noexcept(see below) - -> decltype(auto) { - auto& [_, data, ...child] = sndr; - return product-type{connect( - std::forward_like(child), - basic-receiver>{op})...}; - } -Requires: The expression in the return statement is well-formed. - -Remarks: The expression in the noexcept clause is true if the return statement is not potentially throwing; otherwise, false. - -The expression in the noexcept clause of the constructor of basic-operation is: - -is_nothrow_constructible_v, Self, Rcvr> && -noexcept(connect-all(this, std::forward(sndr), indices-for())) -The expression in the noexcept clause of the connect member function of basic-sender is: - -is_nothrow_constructible_v, Self, Rcvr> -The member default-impls::get-attrs is initialized with a callable object equivalent to the following lambda: - -[](const auto&, const auto&... child) noexcept -> decltype(auto) { - if constexpr (sizeof...(child) == 1) - return (FWD-ENV(get_env(child)), ...); - else - return empty_env(); -} -The member default-impls::get-env is initialized with a callable object equivalent to the following lambda: - -[](auto, auto&, const auto& rcvr) noexcept -> decltype(auto) { - return FWD-ENV(get_env(rcvr)); -} -The member default-impls::get-state is initialized with a callable object equivalent to the following lambda: - -[](Sndr&& sndr, Rcvr& rcvr) noexcept -> decltype(auto) { - auto& [_, data, ...child] = sndr; - return std::forward_like(data); -} -The member default-impls::start is initialized with a callable object equivalent to the following lambda: - -[](auto&, auto&, auto&... ops) noexcept -> void { - (execution::start(ops), ...); -} -The member default-impls::complete is initialized with a callable object equivalent to the following lambda: - -[]( - Index, auto& state, Rcvr& rcvr, Tag, Args&&... args) noexcept - -> void requires callable { - // Mandates: Index::value == 0 - Tag()(std::move(rcvr), std::forward(args)...); -} -For a subexpression sndr let Sndr be decltype((sndr)). Let rcvr be a receiver with an associated environment of type Env such that sender_in is true. completion-signatures-for denotes a specialization of completion_signatures, the set of whose template arguments correspond to the set of completion operations that are potentially evaluated as a result of starting ([async.ops]) the operation state that results from connecting sndr and rcvr. When sender_in is false, the type denoted by completion-signatures-for, if any, is not a specialization of completion_signatures. - -Recommended practice: When sender_in is false, implementations are encouraged to use the type denoted by completion-signatures-for to communicate to users why. - -template - constexpr auto write-env(Sndr&& sndr, Env&& env); // exposition only -write-env is an exposition-only sender adaptor that, when connected with a receiver rcvr, connects the adapted sender with a receiver whose execution environment is the result of joining the queryable argument env to the result of get_env(rcvr). - -Let write-env-t be an exposition-only empty class type. - -Returns: make-sender(write-env-t(), std::forward(env), std::forward(sndr)). - -Remarks: The exposition-only class template impls-for ([exec.snd.general]) is specialized for write-env-t as follows: - -template<> -struct impls-for : default-impls { - static constexpr auto get-env = - [](auto, const auto& state, const auto& rcvr) noexcept { - return JOIN-ENV(state, get_env(rcvr)); - }; -}; -34.9.2. Sender concepts [exec.snd.concepts] -The sender concept defines the requirements for a sender type ([async.ops]). The sender_in concept defines the requirements for a sender type that can create asynchronous operations given an associated environment type. The sender_to concept defines the requirements for a sender type that can connect with a specific receiver type. The get_env customization point object is used to access a sender’s associated attributes. The connect customization point object is used to connect ([async.ops]) a sender and a receiver to produce an operation state. - -namespace std::execution { - template - concept valid-completion-signatures = see below; // exposition only - - template - concept is-sender = // exposition only - derived_from; - - template - concept enable-sender = // exposition only - is-sender || - is-awaitable>; // [exec.awaitables] - - template - concept sender = - bool(enable-sender>) && // atomic constraint ([temp.constr.atomic]) - requires (const remove_cvref_t& sndr) { - { get_env(sndr) } -> queryable; - } && - move_constructible> && // senders are movable and - constructible_from, Sndr>; // decay copyable - - template - concept sender_in = - sender && - queryable && - requires (Sndr&& sndr, Env&& env) { - { get_completion_signatures(std::forward(sndr), std::forward(env)) } - -> valid-completion-signatures; - }; - - template - concept sender_to = - sender_in> && - receiver_of>> && - requires (Sndr&& sndr, Rcvr&& rcvr) { - connect(std::forward(sndr), std::forward(rcvr)); - }; -} -Given a subexpression sndr, let Sndr be decltype((sndr)) and let rcvr be a receiver with an associated environment whose type is Env. A completion operation is a permissible completion for Sndr and Env if its completion signature appears in the argument list of the specialization of completion_signatures denoted by completion_signatures_of_t. Sndr and Env model sender_in if all the completion operations that are potentially evaluated by connecting sndr to rcvr and starting the resulting operation state are permissible completions for Sndr and Env. - -A type models the exposition-only concept valid-completion-signatures if it denotes a specialization of the completion_signatures class template. - -The exposition-only concepts sender-of and sender-in-of define the requirements for a sender type that completes with a given unique set of value result types. - -namespace std::execution { - template - using value-signature = set_value_t(As...); // exposition only - - template - concept sender-in-of = - sender_in && - MATCHING-SIG( // see [exec.general] - set_value_t(Values...), - value_types_of_t); - - template - concept sender-of = sender-in-of; -} -Let sndr be an expression such that decltype((sndr)) is Sndr. The type tag_of_t is as follows: - -If the declaration auto&& [tag, data, ...children] = sndr; would be well-formed, tag_of_t is an alias for decltype(auto(tag)). - -Otherwise, tag_of_t is ill-formed. - -Let sender-for be an exposition-only concept defined as follows: - -namespace std::execution { - template - concept sender-for = - sender && - same_as, Tag>; -} -For a type T, SET-VALUE-SIG(T) denotes the type set_value_t() if T is cv void; otherwise, it denotes the type set_value_t(T). - -Library-provided sender types: - -Always expose an overload of a member connect that accepts an rvalue sender. - -Only expose an overload of a member connect that accepts an lvalue sender if they model copy_constructible. - -34.9.3. Awaitable helpers [exec.awaitables] -The sender concepts recognize awaitables as senders. For [exec], an awaitable is an expression that would be well-formed as the operand of a co_await expression within a given context. - -For a subexpression c, let GET-AWAITER(c, p) be expression-equivalent to the series of transformations and conversions applied to c as the operand of an await-expression in a coroutine, resulting in lvalue e as described by [expr.await], where p is an lvalue referring to the coroutine’s promise, which has type Promise. This includes the invocation of the promise type’s await_transform member if any, the invocation of the operator co_await picked by overload resolution if any, and any necessary implicit conversions and materializations. - -I have opened cwg#250 to give these transformations a term-of-art so we can more easily refer to it here. - -Let is-awaitable be the following exposition-only concept: - -namespace std { - template - concept await-suspend-result = see below; // exposition only - - template - concept is-awaiter = // exposition only - requires (A& a, coroutine_handle h) { - a.await_ready() ? 1 : 0; - { a.await_suspend(h) } -> await-suspend-result; - a.await_resume(); - }; - - template - concept is-awaitable = - requires (C (*fc)() noexcept, Promise& p) { - { GET-AWAITER(fc(), p) } -> is-awaiter; - }; -} -await-suspend-result is true if and only if one of the following is true: - -T is void, or - -T is bool, or - -T is a specialization of coroutine_handle. - -For a subexpression c such that decltype((c)) is type C, and an lvalue p of type Promise, await-result-type denotes the type decltype(GET-AWAITER(c, p).await_resume()). - -Let with-await-transform be the exposition-only class template: - -namespace std::execution { - template - concept has-as-awaitable = // exposition only - requires (T&& t, Promise& p) { - { std::forward(t).as_awaitable(p) } -> is-awaitable; - }; - - template - struct with-await-transform { - template - T&& await_transform(T&& value) noexcept { - return std::forward(value); - } - - template T> - decltype(auto) await_transform(T&& value) - noexcept(noexcept(std::forward(value).as_awaitable(declval()))) { - return std::forward(value).as_awaitable(static_cast(*this)); - } - }; -} -Let env-promise be the exposition-only class template: - -namespace std::execution { - template - struct env-promise : with-await-transform> { - unspecified get_return_object() noexcept; - unspecified initial_suspend() noexcept; - unspecified final_suspend() noexcept; - void unhandled_exception() noexcept; - void return_void() noexcept; - coroutine_handle<> unhandled_stopped() noexcept; - - const Env& get_env() const noexcept; - }; -} -Specializations of env-promise are only used for the purpose of type computation; its members need not be defined. - -34.9.4. execution::default_domain [exec.domain.default] -namespace std::execution { - struct default_domain { - template - requires (sizeof...(Env) <= 1) - static constexpr sender decltype(auto) transform_sender(Sndr&& sndr, const Env&... env) - noexcept(see below); - - template - static constexpr queryable decltype(auto) transform_env(Sndr&& sndr, Env&& env) noexcept; - - template - static constexpr decltype(auto) apply_sender(Tag, Sndr&& sndr, Args&&... args) - noexcept(see below); - }; -} -34.9.4.1. Static members [exec.domain.default.statics] -template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender(Sndr&& sndr, const Env&... env) - noexcept(see below); -Let e be the expression tag_of_t().transform_sender(std::forward(sndr), env...) if that expression is well-formed; otherwise, std::forward(sndr). - -Returns: e. - -Remarks: The exception specification is equivalent to noexcept(e). - -template - constexpr queryable decltype(auto) transform_env(Sndr&& sndr, Env&& env) noexcept; -Let e be the expression tag_of_t().transform_env(std::forward(sndr), std::forward(env)) if that expression is well-formed; otherwise, static_cast(std::forward(env)). - -Mandates: noexcept(e) is true. - -Returns: e. - -template - constexpr decltype(auto) apply_sender(Tag, Sndr&& sndr, Args&&... args) - noexcept(see below); -Let e be the expression Tag().apply_sender(std::forward(sndr), std::forward(args)...). - -Constraints: e is a well-formed expression. - -Returns: e. - -Remarks: The exception specification is equivalent to noexcept(e). - -34.9.5. execution::transform_sender [exec.snd.transform] -namespace std::execution { - template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender(Domain dom, Sndr&& sndr, const Env&... env) - noexcept(see below); -} -Let transformed-sndr be the expression dom.transform_sender(std::forward(sndr), env...) if that expression is well-formed; otherwise, default_domain().transform_sender(std::forward(sndr), env...). Let final-sndr be the expression transformed-sndr if transformed-sndr and sndr have the same type ignoring cv qualifiers; otherwise, it is the expression transform_sender(dom, transformed-sndr, env...). - -Returns: final-sndr. - -Remarks: The exception specification is equivalent to noexcept(final-sndr). - -34.9.6. execution::transform_env [exec.snd.transform.env] -namespace std::execution { - template - constexpr queryable decltype(auto) transform_env(Domain dom, Sndr&& sndr, Env&& env) noexcept; -} -Let e be the expression dom.transform_env(std::forward(sndr), std::forward(env)) if that expression is well-formed; otherwise, default_domain().transform_env(std::forward(sndr), std::forward(env)). - -Mandates: noexcept(e) is true. - -Returns: e. - -34.9.7. execution::apply_sender [exec.snd.apply] -namespace std::execution { - template - constexpr decltype(auto) apply_sender(Domain dom, Tag, Sndr&& sndr, Args&&... args) - noexcept(see below); -} -Let e be the expression dom.apply_sender(Tag(), std::forward(sndr), std::forward(args)...) if that expression is well-formed; otherwise, default_domain().apply_sender(Tag(), std::forward(sndr), std::forward(args)...). - -Constraints: The expression e is well-formed. - -Returns: e. - -Remarks: The exception specification is equivalent to noexcept(e). - -34.9.8. execution::get_completion_signatures [exec.getcomplsigs] -get_completion_signatures is a customization point object. Let sndr be an expression such that decltype((sndr)) is Sndr, and let env be an expression such that decltype((env)) is Env. Let new_sndr be the expression transform_sender(decltype(get-domain-late(sndr, env)){}, sndr, env), and let NewSndr be decltype((new_sndr)). Then get_completion_signatures(sndr, env) is expression-equivalent to (void(sndr), void(env), CS()) except that void(sndr) and void(env) are indeterminately sequenced, where CS is: - -decltype(new_sndr.get_completion_signatures(env)) if that type is well-formed, - -Otherwise, remove_cvref_t::completion_signatures if that type is well-formed, - -Otherwise, if is-awaitable> is true, then: - -completion_signatures< - SET-VALUE-SIG(await-result-type>), // see [exec.snd.concepts] - set_error_t(exception_ptr), - set_stopped_t()> -Otherwise, CS is ill-formed. - -Let rcvr be an rvalue whose type Rcvr models receiver, and let Sndr be the type of a sender such that sender_in> is true. Let Sigs... be the template arguments of the completion_signatures specialization named by completion_signatures_of_t>. Let CSO be a completion function. If sender Sndr or its operation state cause the expression CSO(rcvr, args...) to be potentially evaluated ([basic.def.odr]) then there shall be a signature Sig in Sigs... such that MATCHING-SIG(decayed-typeof(decltype(args)...), Sig) is true ([exec.general]). - -34.9.9. execution::connect [exec.connect] -connect connects ([async.ops]) a sender with a receiver. - -The name connect denotes a customization point object. For subexpressions sndr and rcvr, let Sndr be decltype((sndr)) and Rcvr be decltype((rcvr)), let new_sndr be the expression transform_sender(decltype(get-domain-late(sndr, get_env(rcvr))){}, sndr, get_env(rcvr)), and let DS and DR be decay_t and decay_t, respectively. - -Let connect-awaitable-promise be the following exposition-only class: - -namespace std::execution { - struct connect-awaitable-promise - : with-await-transform { - - connect-awaitable-promise(DS&, DR& rcvr) noexcept : rcvr(rcvr) {} - - suspend_always initial_suspend() noexcept { return {}; } - [[noreturn]] suspend_always final_suspend() noexcept { terminate(); } - [[noreturn]] void unhandled_exception() noexcept { terminate(); } - [[noreturn]] void return_void() noexcept { terminate(); } - - coroutine_handle<> unhandled_stopped() noexcept { - set_stopped(std::move(rcvr)); - return noop_coroutine(); - } - - operation-state-task get_return_object() noexcept { - return operation-state-task{ - coroutine_handle::from_promise(*this)}; - } - - env_of_t get_env() const noexcept { - return execution::get_env(rcvr); - } - - private: - DR& rcvr; // exposition only - }; -} -Let operation-state-task be the following exposition-only class: - -namespace std::execution { - struct operation-state-task { - using operation_state_concept = operation_state_t; - using promise_type = connect-awaitable-promise; - - explicit operation-state-task(coroutine_handle<> h) noexcept : coro(h) {} - operation-state-task(operation-state-task&& o) noexcept - : coro(exchange(o.coro, {})) {} - ~operation-state-task() { if (coro) coro.destroy(); } - - void start() & noexcept { - coro.resume(); - } - - private: - coroutine_handle<> coro; // exposition only - }; -} -Let V name the type await-result-type, let Sigs name the type: - -completion_signatures< - SET-VALUE-SIG(V), // see [exec.snd.concepts] - set_error_t(exception_ptr), - set_stopped_t()> -and let connect-awaitable be an exposition-only coroutine defined as follows: - -namespace std::execution { - template - auto suspend-complete(Fun fun, Ts&&... as) noexcept { // exposition only - auto fn = [&, fun]() noexcept { fun(std::forward(as)...); }; - - struct awaiter { - decltype(fn) fn; - - static constexpr bool await_ready() noexcept { return false; } - void await_suspend(coroutine_handle<>) noexcept { fn(); } - [[noreturn]] void await_resume() noexcept { unreachable(); } - }; - return awaiter{fn}; - } - - operation-state-task connect-awaitable(DS sndr, DR rcvr) requires receiver_of { - exception_ptr ep; - try { - if constexpr (same_as) { - co_await std::move(sndr); - co_await suspend-complete(set_value, std::move(rcvr)); - } else { - co_await suspend-complete(set_value, std::move(rcvr), co_await std::move(sndr)); - } - } catch(...) { - ep = current_exception(); - } - co_await suspend-complete(set_error, std::move(rcvr), std::move(ep)); - } -} -The expression connect(sndr, rcvr) is expression-equivalent to: - -new_sndr.connect(rcvr) if that expression is well-formed. - -Mandates: The type of the expression above satisfies operation_state. - -Otherwise, connect-awaitable(new_sndr, rcvr). - -Mandates: sender && receiver is true. - -34.9.10. Sender factories [exec.factories] -34.9.10.1. execution::schedule [exec.schedule] -schedule obtains a schedule sender ([async.ops]) from a scheduler. - -The name schedule denotes a customization point object. For a subexpression sch, the expression schedule(sch) is expression-equivalent to sch.schedule(). - -If the expression get_completion_scheduler( get_env(sch.schedule())) == sch is ill-formed or evaluates to false, the behavior of calling schedule(sch) is undefined. - -Mandates: The type of sch.schedule() satisfies sender. - -34.9.10.2. execution::just, execution::just_error, execution::just_stopped [exec.just] -just, just_error, and just_stopped are sender factories whose asynchronous operations complete synchronously in their start operation with a value completion operation, an error completion operation, or a stopped completion operation respectively. - -The names just, just_error, and just_stopped denote customization point objects. Let just-cpo be one of just, just_error, or just_stopped. For a pack of subexpressions ts, let Ts be the pack of types decltype((ts)). The expression just-cpo(ts...) is ill-formed if: - -(movable-value &&...) is false, or - -just-cpo is just_error and sizeof...(ts) == 1 is false, or - -just-cpo is just_stopped and sizeof...(ts) == 0 is false; - -Otherwise, it is expression-equivalent to make-sender(just-cpo, product-type{ts...}). - -For just, just_error, and just_stopped, let set-cpo be set_value, set_error, and set_stopped respectively. The exposition-only class template impls-for ([exec.snd.general]) is specialized for just-cpo as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto start = - [](auto& state, auto& rcvr) noexcept -> void { - auto& [...ts] = state; - set-cpo(std::move(rcvr), std::move(ts)...); - }; - }; -} -34.9.10.3. execution::read_env [exec.read.env] -read_env is a sender factory for a sender whose asynchronous operation completes synchronously in its start operation with a value completion result equal to a value read from the receiver’s associated environment. - -read_env is a customization point object. For some query object q, the expression read_env(q) is expression-equivalent to make-sender(read_env, q). - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for read_env as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto start = - [](auto query, auto& rcvr) noexcept -> void { - TRY-SET-VALUE(rcvr, query(get_env(rcvr))); - }; - }; -} -34.9.11. Sender adaptors [exec.adapt] -34.9.11.1. General [exec.adapt.general] -[exec.adapt] specifies a set of sender adaptors. - -The bitwise inclusive OR operator is overloaded for the purpose of creating sender chains. The adaptors also support function call syntax with equivalent semantics. - -Unless otherwise specified: - -A sender adaptor is prohibited from causing observable effects, apart from moving and copying its arguments, before the returned sender is connected with a receiver using connect, and start is called on the resulting operation state. - -A parent sender ([async.ops]) with a single child sender sndr has an associated attribute object equal to FWD-ENV(get_env(sndr)) ([exec.fwd.env]). - -A parent sender with more than one child sender has an associated attributes object equal to empty_env{}. - -When a parent sender is connected to a receiver rcvr, any receiver used to connect a child sender has an associated environment equal to FWD-ENV(get_env(rcvr)). - -These requirements apply to any function that is selected by the implementation of the sender adaptor. - -If a sender returned from a sender adaptor specified in [exec.adapt] is specified to include set_error_t(Err) among its set of completion signatures where decay_t denotes the type exception_ptr, but the implementation does not potentially evaluate an error completion operation with an exception_ptr argument, the implementation is allowed to omit the exception_ptr error completion signature from the set. - -34.9.11.2. Sender adaptor closure objects [exec.adapt.objects] -A pipeable sender adaptor closure object is a function object that accepts one or more sender arguments and returns a sender. For a pipeable sender adaptor closure object c and an expression sndr such that decltype((sndr)) models sender, the following expressions are equivalent and yield a sender: - -c(sndr) -sndr | c -Given an additional pipeable sender adaptor closure object d, the expression c | d produces another pipeable sender adaptor closure object e: - -e is a perfect forwarding call wrapper ([func.require]) with the following properties: - -Its target object is an object d2 of type decltype(auto(d)) direct-non-list-initialized with d. - -It has one bound argument entity, an object c2 of type decltype(auto(c)) direct-non-list-initialized with c. - -Its call pattern is d2(c2(arg)), where arg is the argument used in a function call expression of e. - -The expression c | d is well-formed if and only if the initializations of the state entities ([func.def]) of e are all well-formed. - -An object t of type T is a pipeable sender adaptor closure object if T models derived_from>, T has no other base classes of type sender_adaptor_closure for any other type U, and T does not satisfy sender. - -The template parameter D for sender_adaptor_closure can be an incomplete type. Before any expression of type cv D appears as an operand to the | operator, D shall be complete and model derived_from>. The behavior of an expression involving an object of type cv D as an operand to the | operator is undefined if overload resolution selects a program-defined operator| function. - -A pipeable sender adaptor object is a customization point object that accepts a sender as its first argument and returns a sender. - -If a pipeable sender adaptor object accepts only one argument, then it is a pipeable sender adaptor closure object. - -If a pipeable sender adaptor object adaptor accepts more than one argument, then let sndr be an expression such that decltype((sndr)) models sender, let args... be arguments such that adaptor(sndr, args...) is a well-formed expression as specified below, and let BoundArgs be a pack that denotes decltype(auto(args)).... The expression adaptor(args...) produces a pipeable sender adaptor closure object f that is a perfect forwarding call wrapper with the following properties: - -Its target object is a copy of adaptor. - -Its bound argument entities bound_args consist of objects of types BoundArgs... direct-non-list-initialized with std::forward(args)..., respectively. - -Its call pattern is adaptor(rcvr, bound_args...), where rcvr is the argument used in a function call expression of f. - -The expression adaptor(args...) is well-formed if and only if the initializations of the bound argument entities of the result, as specified above, are all well-formed. - -34.9.11.3. execution::starts_on [exec.starts.on] -starts_on adapts an input sender into a sender that will start on an execution agent belonging to a particular scheduler’s associated execution resource. - -The name starts_on denotes a customization point object. For subexpressions sch and sndr, if decltype((sch)) does not satisfy scheduler, or decltype((sndr)) does not satisfy sender, starts_on(sch, sndr) is ill-formed. - -Otherwise, the expression starts_on(sch, sndr) is expression-equivalent to: - -transform_sender( - query-or-default(get_domain, sch, default_domain()), - make-sender(starts_on, sch, sndr)) -except that sch is evaluated only once. - -Let out_sndr and env be subexpressions such that OutSndr is decltype((out_sndr)). If sender-for is false, then the expressions starts_on.transform_env(out_sndr, env) and starts_on.transform_sender(out_sndr, env) are ill-formed; otherwise: - -starts_on.transform_env(out_sndr, env) is equivalent to: - -auto&& [_, sch, _] = out_sndr; -return JOIN-ENV(SCHED-ENV(sch), FWD-ENV(env)); -starts_on.transform_sender(out_sndr, env) is equivalent to: - -auto&& [_, sch, sndr] = out_sndr; -return let_value( - schedule(sch), - [sndr = std::forward_like(sndr)]() mutable - noexcept(is_nothrow_move_constructible_v) { - return std::move(sndr); - }); -Let out_sndr be a subexpression denoting a sender returned from starts_on(sch, sndr) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr with out_rcvr. Calling start(op) shall start sndr on an execution agent of the associated execution resource of sch. If scheduling onto sch fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -34.9.11.4. execution::continues_on [exec.continues.on] -continues_on adapts a sender into one that completes on the specified scheduler. - -The name continues_on denotes a pipeable sender adaptor object. For subexpressions sch and sndr, if decltype((sch)) does not satisfy scheduler, or decltype((sndr)) does not satisfy sender, continues_on(sndr, sch) is ill-formed. - -Otherwise, the expression continues_on(sndr, sch) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(continues_on, sch, sndr)) -except that sndr is evaluated only once. - -The exposition-only class template impls-for is specialized for continues_on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get_attrs = - [](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); - }; - }; -} -Let sndr and env be subexpressions such that Sndr is decltype((sndr)). If sender-for is false, then the expression continues_on.transform_sender(sndr, env) is ill-formed; otherwise, it is equal to: - -auto [_, data, child] = sndr; -return schedule_from(std::move(data), std::move(child)); -This causes the continues_on(sndr, sch) sender to become schedule_from(sch, sndr) when it is connected with a receiver whose execution domain does not customize continues_on. - -Let out_sndr be a subexpression denoting a sender returned from continues_on(sndr, sch) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr with out_rcvr. Calling start(op) shall start sndr on the current execution agent and execute completion operations on out_rcvr on an execution agent of the execution resource associated with sch. If scheduling onto sch fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -34.9.11.5. execution::schedule_from [exec.schedule.from] -schedule_from schedules work dependent on the completion of a sender onto a scheduler’s associated execution resource. schedule_from is not meant to be used in user code; it is used in the implementation of continues_on. - -The name schedule_from denotes a customization point object. For some subexpressions sch and sndr, let Sch be decltype((sch)) and Sndr be decltype((sndr)). If Sch does not satisfy scheduler, or Sndr does not satisfy sender, schedule_from(sch, sndr) is ill-formed. - -Otherwise, the expression schedule_from(sch, sndr) is expression-equivalent to: - -transform_sender( - query-or-default(get_domain, sch, default_domain()), - make-sender(schedule_from, sch, sndr)) -except that sch is evaluated only once. - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for schedule_from_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - }; -} -The member impls-for::get-attrs is initialized with a callable object equivalent to the following lambda: - -[](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); -} -The member impls-for::get-state is initialized with a callable object equivalent to the following lambda: - -[](Sndr&& sndr, Rcvr& rcvr) noexcept(see below) - requires sender_in, env_of_t> { - - auto& [_, sch, child] = sndr; - - using sched_t = decltype(auto(sch)); - using variant_t = see below; - using receiver_t = see below; - using operation_t = connect_result_t, receiver_t>; - constexpr bool nothrow = noexcept(connect(schedule(sch), receiver_t{nullptr})); - - struct state-type { - Rcvr& rcvr; // exposition only - variant_t async-result; // exposition only - operation_t op-state; // exposition only - - explicit state-type(sched_t sch, Rcvr& rcvr) noexcept(nothrow) - : rcvr(rcvr), op-state(connect(schedule(sch), receiver_t{this})) {} - }; - - return state-type{sch, rcvr}; -} -Objects of the local class state-type can be used to initialize a structured binding. - -Let Sigs be a pack of the arguments to the completion_signatures specialization named by completion_signatures_of_t, env_of_t>. Let as-tuple be an alias template that transforms a completion signature Tag(Args...) into the tuple specialization decayed-tuple. Then variant_t denotes the type variant...>, except with duplicate types removed. - -receiver_t is an alias for the following exposition-only class: - -namespace std::execution { - struct receiver-type { - using receiver_concept = receiver_t; - state-type* state; // exposition only - - void set_value() && noexcept { - visit( - [this](Tuple& result) noexcept -> void { - if constexpr (!same_as) { - auto& [tag, ...args] = result; - tag(std::move(state->rcvr), std::move(args)...); - } - }, - state->async-result); - } - - template - void set_error(Error&& err) && noexcept { - execution::set_error(std::move(state->rcvr), std::forward(err)); - } - - void set_stopped() && noexcept { - execution::set_stopped(std::move(state->rcvr)); - } - - decltype(auto) get_env() const noexcept { - return FWD-ENV(execution::get_env(state->rcvr)); - } - }; -} -The expression in the noexcept clause of the lambda is true if the construction of the returned state-type object is not potentially throwing; otherwise, false. - -The member impls-for::complete is initialized with a callable object equivalent to the following lambda: - -[](auto, auto& state, auto& rcvr, Tag, Args&&... args) noexcept -> void { - using result_t = decayed-tuple; - constexpr bool nothrow = is_nothrow_constructible_v; - - TRY-EVAL(rcvr, [&]() noexcept(nothrow) { - state.async-result.template emplace(Tag(), std::forward(args)...); - }()); - - if (state.async-result.valueless_by_exception()) - return; - if (state.async-result.index() == 0) - return; - - start(state.op-state); -}; -Let out_sndr be a subexpression denoting a sender returned from schedule_from(sch, sndr) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr with out_rcvr. Calling start(op) shall start sndr on the current execution agent and execute completion operations on out_rcvr on an execution agent of the execution resource associated with sch. If scheduling onto sch fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -34.9.11.6. execution::on [exec.on] -The on sender adaptor has two forms: - -on(sch, sndr), which starts a sender sndr on an execution agent belonging to a scheduler sch's associated execution resource and that, upon sndr's completion, transfers execution back to the execution resource on which the on sender was started. - -on(sndr, sch, closure), which upon completion of a sender sndr, transfers execution to an execution agent belonging to a scheduler sch's associated execution resource, then executes a sender adaptor closure closure with the async results of the sender, and that then transfers execution back to the execution resource on which sndr completed. - -The name on denotes a pipeable sender adaptor object. For subexpressions sch and sndr, on(sch, sndr) is ill-formed if any of the following is true: - -decltype((sch)) does not satisfy scheduler, or - -decltype((sndr)) does not satisfy sender and sndr is not a pipeable sender adaptor closure object ([exec.adapt.objects]), or - -decltype((sndr)) satisfies sender and sndr is also a pipeable sender adaptor closure object. - -Otherwise, if decltype((sndr)) satisfies sender, the expression on(sch, sndr) is expression-equivalent to: - -transform_sender( - query-or-default(get_domain, sch, default_domain()), - make-sender(on, sch, sndr)) -except that sch is evaluated only once. - -For subexpressions sndr, sch, and closure, if decltype((sch)) does not satisfy scheduler, or decltype((sndr)) does not satisfy sender, or closure is not a pipeable sender adaptor closure object ([exec.adapt.objects]), the expression on(sndr, sch, closure) is ill-formed; otherwise, it is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(on, product-type{sch, closure}, sndr)) -except that sndr is evaluated only once. - -Let out_sndr and env be subexpressions, let OutSndr be decltype((out_sndr)), and let Env be decltype((env)). If sender-for is false, then the expressions on.transform_env(out_sndr, env) and on.transform_sender(out_sndr, env) are ill-formed; otherwise: - -Let not-a-scheduler be an unspecified empty class type, and let not-a-sender be the exposition-only type: - -struct not-a-sender { - using sender_concept = sender_t; - - auto get_completion_signatures(auto&&) const { - return see below; - } -}; -where the member function get_completion_signatures returns an object of a type that is not a specialization of the completion_signatures class template. - -The expression on.transform_env(out_sndr, env) has effects equivalent to: - -auto&& [_, data, _] = out_sndr; -if constexpr (scheduler) { - return JOIN-ENV(SCHED-ENV(std::forward_like(data)), FWD-ENV(std::forward(env))); -} else { - return std::forward(env); -} -The expression on.transform_sender(out_sndr, env) has effects equivalent to: - -auto&& [_, data, child] = out_sndr; -if constexpr (scheduler) { - auto orig_sch = - query-with-default(get_scheduler, env, not-a-scheduler()); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return continues_on( - starts_on(std::forward_like(data), std::forward_like(child)), - std::move(orig_sch)); - } -} else { - auto& [sch, closure] = data; - auto orig_sch = query-with-default( - get_completion_scheduler, - get_env(child), - query-with-default(get_scheduler, env, not-a-scheduler())); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return write-env( - continues_on( - std::forward_like(closure)( - continues_on( - write-env(std::forward_like(child), SCHED-ENV(orig_sch)), - sch)), - orig_sch), - SCHED-ENV(sch)); - } -} -Recommended practice: Implementations should use the return type of not-a-sender::get_completion_signatures to inform users that their usage of on is incorrect because there is no available scheduler onto which to restore execution. - -Let out_sndr be a subexpression denoting a sender returned from on(sch, sndr) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr with out_rcvr. Calling start(op) shall: - -Remember the current scheduler, get_scheduler(get_env(rcvr)). - -Start sndr on an execution agent belonging to sch's associated execution resource. - -Upon sndr's completion, transfer execution back to the execution resource associated with the scheduler remembered in step 1. - -Forward sndr's async result to out_rcvr. - -If any scheduling operation fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -Let out_sndr be a subexpression denoting a sender returned from on(sndr, sch, closure) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr with out_rcvr. Calling start(op) shall: - -Remember the current scheduler, which is the first of the following expressions that is well-formed: - -get_completion_scheduler(get_env(sndr)) - -get_scheduler(get_env(rcvr)) - -Start sndr on the current execution agent. - -Upon sndr's completion, transfer execution to an agent owned by sch's associated execution resource. - -Forward sndr's async result as if by connecting and starting a sender closure(S), where S is a sender that completes synchronously with sndr's async result. - -Upon completion of the operation started in step 4, transfer execution back to the execution resource associated with the scheduler remembered in step 1 and forward the operation’s async result to out_rcvr. - -If any scheduling operation fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -34.9.11.7. execution::then, execution::upon_error, execution::upon_stopped [exec.then] -then attaches an invocable as a continuation for an input sender’s value completion operation. upon_error and upon_stopped do the same for the error and stopped completion operations respectively, sending the result of the invocable as a value completion. - -The names then, upon_error, and upon_stopped denote pipeable sender adaptor objects. Let the expression then-cpo be one of then, upon_error, or upon_stopped. For subexpressions sndr and f, if decltype((sndr)) does not satisfy sender, or decltype((f)) does not satisfy movable-value, then-cpo(sndr, f) is ill-formed. - -Otherwise, the expression then-cpo(sndr, f) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(then-cpo, f, sndr)) -except that sndr is evaluated only once. - -For then, upon_error, and upon_stopped, let set-cpo be set_value, set_error, and set_stopped respectively. The exposition-only class template impls-for ([exec.snd.general]) is specialized for then-cpo as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto complete = - [] - (auto, auto& fn, auto& rcvr, Tag, Args&&... args) noexcept -> void { - if constexpr (same_as>) { - TRY-SET-VALUE(rcvr, - invoke(std::move(fn), std::forward(args)...)); - } else { - Tag()(std::move(rcvr), std::forward(args)...); - } - }; - }; -} -The expression then-cpo(sndr, f) has undefined behavior unless it returns a sender out_sndr that: - -Invokes f or a copy of such with the value, error, or stopped result datums of sndr for then, upon_error, and upon_stopped respectively, using the result value of f as out_sndr's value completion, and - -Forwards all other completion operations unchanged. - -34.9.11.8. execution::let_value, execution::let_error, execution::let_stopped, [exec.let] -let_value, let_error, and let_stopped transform a sender’s value, error, and stopped completions respectively into a new child asynchronous operation by passing the sender’s result datums to a user-specified callable, which returns a new sender that is connected and started. - -For let_value, let_error, and let_stopped, let set-cpo be set_value, set_error, and set_stopped respectively. Let the expression let-cpo be one of let_value, let_error, or let_stopped. For a subexpression sndr, let let-env(sndr) be expression-equivalent to the first well-formed expression below: - -SCHED-ENV(get_completion_scheduler>(get_env(sndr))) - -MAKE-ENV(get_domain, get_domain(get_env(sndr))) - -(void(sndr), empty_env{}) - -The names let_value, let_error, and let_stopped denote pipeable sender adaptor objects. For subexpressions sndr and f, let F be the decayed type of f. If decltype((sndr)) does not satisfy sender or if decltype((f)) does not satisfy movable-value, the expression let-cpo(sndr, f) is ill-formed. If F does not satisfy invocable, the expression let_stopped(sndr, f) is ill-formed. - -Otherwise, the expression let-cpo(sndr, f) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(let-cpo, f, sndr)) -except that sndr is evaluated only once. - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for let-cpo as follows: - -namespace std::execution { - template - void let-bind(State& state, Rcvr& rcvr, Args&&... args); // exposition only - - template<> - struct impls-for> : default-impls { - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - }; -} -Let receiver2 denote the following exposition-only class template: - -namespace std::execution { - template - struct receiver2 { - using receiver_concept = receiver_t; - - template - void set_value(Args&&... args) && noexcept { - execution::set_value(std::move(rcvr), std::forward(args)...); - } - - template - void set_error(Error&& err) && noexcept { - execution::set_error(std::move(rcvr), std::forward(err)); - } - - void set_stopped() && noexcept { - execution::set_stopped(std::move(rcvr)); - } - - decltype(auto) get_env() const noexcept { - return JOIN-ENV(env, FWD-ENV(execution::get_env(rcvr))); - } - - Rcvr& rcvr; // exposition only - Env env; // exposition only - }; -} -impls-for>::get-state is initialized with a callable object equivalent to the following: - -[](Sndr&& sndr, Rcvr& rcvr) requires see below { - auto& [_, fn, child] = sndr; - using fn_t = decay_t; - using env_t = decltype(let-env(child)); - using args_variant_t = see below; - using ops2_variant_t = see below; - - struct state-type { - fn_t fn; // exposition only - env_t env; // exposition only - args_variant_t args; // exposition only - ops2_variant_t ops2; // exposition only - }; - return state-type{std::forward_like(fn), let-env(child), {}, {}}; -} -Let Sigs be a pack of the arguments to the completion_signatures specialization named by completion_signatures_of_t, env_of_t>. Let LetSigs be a pack of those types in Sigs with a return type of decayed-typeof. Let as-tuple be an alias template such that as-tuple denotes the type decayed-tuple. Then args_variant_t denotes the type variant...> except with duplicate types removed. - -Given a type Tag and a pack Args, let as-sndr2 be an alias template such that as-sndr2 denotes the type call-result-t&...>. Then ops2_variant_t denotes the type variant, receiver2>...> except with duplicate types removed. - -The requires-clause constraining the above lambda is satisfied if and only if the types args_variant_t and ops2_variant_t are well-formed. - -The exposition-only function template let-bind has effects equivalent to: - -using args_t = decayed-tuple; -auto mkop2 = [&] { - return connect( - apply(std::move(state.fn), - state.args.template emplace(std::forward(args)...)), - receiver2{rcvr, std::move(state.env)}); -}; -start(state.ops2.template emplace(emplace-from{mkop2})); -impls-for>::complete is initialized with a callable object equivalent to the following: - -[] - (auto, auto& state, auto& rcvr, Tag, Args&&... args) noexcept -> void { - if constexpr (same_as>) { - TRY-EVAL(rcvr, let-bind(state, rcvr, std::forward(args)...)); - } else { - Tag()(std::move(rcvr), std::forward(args)...); - } - } -Let sndr and env be subexpressions, and let Sndr be decltype((sndr)). If sender-for> is false, then the expression let-cpo.transform_env(sndr, env) is ill-formed. Otherwise, it is equal to JOIN-ENV(let-env(sndr), FWD-ENV(env)). - -Let the subexpression out_sndr denote the result of the invocation let-cpo(sndr, f) or an object equal to such, and let the subexpression rcvr denote a receiver such that the expression connect(out_sndr, rcvr) is well-formed. The expression connect(out_sndr, rcvr) has undefined behavior unless it creates an asynchronous operation ([async.ops]) that, when started: - -invokes f when set-cpo is called with sndr's result datums, - -makes its completion dependent on the completion of a sender returned by f, and - -propagates the other completion operations sent by sndr. - -34.9.11.9. execution::bulk [exec.bulk] -bulk runs a task repeatedly for every index in an index space. - -The name bulk denotes a pipeable sender adaptor object. For subexpressions sndr, shape, and f, let Shape be decltype(auto(shape)). If decltype((sndr)) does not satisfy sender, or if Shape does not satisfy integral, or if decltype((f)) does not satisfy movable-value, bulk(sndr, shape, f) is ill-formed. - -Otherwise, the expression bulk(sndr, shape, f) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(bulk, product-type{shape, f}, sndr)) -except that sndr is evaluated only once. - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for bulk_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto complete = see below; - }; -} -The member impls-for::complete is initialized with a callable object equivalent to the following lambda: - -[] - (Index, State& state, Rcvr& rcvr, Tag, Args&&... args) noexcept -> void requires see below { - if constexpr (same_as) { - auto& [shape, f] = state; - constexpr bool nothrow = noexcept(f(auto(shape), args...)); - TRY-EVAL(rcvr, [&]() noexcept(nothrow) { - for (decltype(auto(shape)) i = 0; i < shape; ++i) { - f(auto(i), args...); - } - Tag()(std::move(rcvr), std::forward(args)...); - }()); - } else { - Tag()(std::move(rcvr), std::forward(args)...); - } - } -The expression in the requires-clause of the lambda above is true if and only if Tag denotes a type other than set_value_t or if the expression f(auto(shape), args...) is well-formed. - -Let the subexpression out_sndr denote the result of the invocation bulk(sndr, shape, f) or an object equal to such, and let the subexpression rcvr denote a receiver such that the expression connect(out_sndr, rcvr) is well-formed. The expression connect(out_sndr, rcvr) has undefined behavior unless it creates an asynchronous operation ([async.ops]) that, when started: - -on a value completion operation, invokes f(i, args...) for every i of type Shape from 0 to shape, where args is a pack of lvalue subexpressions referring to the value completion result datums of the input sender, and - -propagates all completion operations sent by sndr. - -34.9.11.10. execution::split [exec.split] -split adapts an arbitrary sender into a sender that can be connected multiple times. - -Let split-env be the type of an environment such that, given an instance env, the expression get_stop_token(env) is well-formed and has type inplace_stop_token. - -The name split denotes a pipeable sender adaptor object. For a subexpression sndr, let Sndr be decltype((sndr)). If sender_in is false, split(sndr) is ill-formed. - -Otherwise, the expression split(sndr) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(split, {}, sndr)) -except that sndr is evaluated only once. - -The default implementation of transform_sender will have the effect of connecting the sender to a receiver. It will return a sender with a different tag type. - -Let local-state denote the following exposition-only class template: - -namespace std::execution { - struct local-state-base { // exposition only - virtual ~local-state-base() = default; - virtual void notify() noexcept = 0; // exposition only - }; - - template - struct local-state : local-state-base { // exposition only - using on-stop-callback = // exposition only - stop_callback_of_t>, on-stop-request>; - - local-state(Sndr&& sndr, Rcvr& rcvr) noexcept; - ~local-state(); - - void notify() noexcept override; - - private: - optional on_stop; // exposition only - shared-state* sh_state; // exposition only - Rcvr* rcvr; // exposition only - }; -} -local-state(Sndr&& sndr, Rcvr& rcvr) noexcept; -Effects: Equivalent to: - -auto& [_, data, _] = sndr; -this->sh_state = data.sh_state.get(); -this->sh_state->inc-ref(); -this->rcvr = addressof(rcvr); -~local-state(); -Effects: Equivalent to: - -sh_state->dec-ref(); -void notify() noexcept override; -Effects: Equivalent to: - -on_stop.reset(); -visit( - [this](const auto& tupl) noexcept -> void { - apply( - [this](auto tag, const auto&... args) noexcept -> void { - tag(std::move(*rcvr), args...); - }, - tupl); - }, - sh_state->result); -Let split-receiver denote the following exposition-only class template: - -namespace std::execution { - template - struct split-receiver { - using receiver_concept = receiver_t; - - template - void complete(Tag, Args&&... args) noexcept { // exposition only - using tuple_t = decayed-tuple; - try { - sh_state->result.template emplace(Tag(), std::forward(args)...); - } catch (...) { - using tuple_t = tuple; - sh_state->result.template emplace(set_error, current_exception()); - } - sh_state->notify(); - } - - template - void set_value(Args&&... args) && noexcept { - complete(execution::set_value, std::forward(args)...); - } - - template - void set_error(Error&& err) && noexcept { - complete(execution::set_error, std::forward(err)); - } - - void set_stopped() && noexcept { - complete(execution::set_stopped); - } - - struct env { // exposition only - shared-state* sh-state; // exposition only - - inplace_stop_token query(get_stop_token_t) const noexcept { - return sh-state->stop_src.get_token(); - } - }; - - env get_env() const noexcept { - return env{sh_state}; - } - - shared-state* sh_state; // exposition only - }; -} -Let shared-state denote the following exposition-only class template: - -namespace std::execution { - template - struct shared-state { - using variant-type = see below; // exposition only - using state-list-type = see below; // exposition only - - explicit shared-state(Sndr&& sndr); - - void start-op() noexcept; // exposition only - void notify() noexcept; // exposition only - void inc-ref() noexcept; // exposition only - void dec-ref() noexcept; // exposition only - - inplace_stop_source stop_src{}; // exposition only - variant-type result{}; // exposition only - state-list-type waiting_states; // exposition only - atomic completed{false}; // exposition only - atomic ref_count{1}; // exposition only - connect_result_t> op_state; // exposition only - }; -} -Let Sigs be a pack of the arguments to the completion_signatures specialization named by completion_signatures_of_t. For type Tag and pack Args, let as-tuple be an alias template such that as-tuple denotes the type decayed-tuple. Then variant-type denotes the type variant, tuple, as-tuple...>, but with duplicate types removed. - -Let state-list-type be a type that stores a list of pointers to local-state-base objects and that permits atomic insertion. - - explicit shared-state(Sndr&& sndr); -Effects: Initializes op_state with the result of connect(std::forward(sndr), split-receiver{this}). - -Postcondition: waiting_states is empty, and completed is false. - - void start-op() noexcept; -Effects: Calls inc-ref(). If stop_src.stop_requested() is true, calls notify(); otherwise, calls start(op_state). - - void notify() noexcept; -Effects: Atomically does the following: - -Sets completed to true, and - -Exchanges waiting_states with an empty list, storing the old value in a local prior_states. - -Then, for each pointer p in prior_states, calls p->notify(). Finally, calls dec-ref(). - - void inc-ref() noexcept; -Effects: Increments ref_count. - - void dec-ref() noexcept; -Effects: Decrements ref_count. If the new value of ref_count is 0, calls delete this. - -Synchronization: If dec-ref() does not decrement the ref_count to 0 then synchronizes with the call to dec-ref() that decrements ref_count to 0. - -Let split-impl-tag be an empty exposition-only class type. Given an expression sndr, the expression split.transform_sender(sndr) is equivalent to: - -auto&& [tag, _, child] = sndr; -auto* sh_state = new shared-state{std::forward_like(child)}; -return make-sender(split-impl-tag(), shared-wrapper{sh_state, tag}); -where shared-wrapper is an exposition-only class that manages the reference count of the shared-state object pointed to by sh_state. shared-wrapper models copyable with move operations nulling out the moved-from object, copy operations incrementing the reference count by calling sh_state->inc-ref(), and assignment operations performing a copy-and-swap operation. The destructor has no effect if sh_state is null; otherwise, it decrements the reference count by calling sh_state->dec-ref(). - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for split-impl-tag as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-state = see below; - static constexpr auto start = see below; - }; -} -The member impls-for::get-state is initialized with a callable object equivalent to the following lambda expression: - -[](Sndr&& sndr, auto& rcvr) noexcept { - return local-state{std::forward(sndr), rcvr}; -} -The member impls-for::start is initialized with a callable object that has a function call operator equivalent to the following: - -template -void operator()(local-state& state, Rcvr& rcvr) const noexcept; -Effects: If state.sh_state->completed is true, calls state.notify() and returns. Otherwise, does the following in order: - -Calls: - -state.on_stop.emplace( - get_stop_token(get_env(rcvr)), - on-stop-request{state.sh_state->stop_src}); -Then atomically does the following: - -Reads the value c of state.sh_state->completed, and - -Inserts addressof(state) into state.sh_state->waiting_states if c is false. - -If c is true, calls state.notify() and returns. - -Otherwise, if addressof(state) is the first item added to state.sh_state->waiting_states, calls state.sh_state->start-op(). - -34.9.11.11. execution::when_all [exec.when.all] -when_all and when_all_with_variant both adapt multiple input senders into a sender that completes when all input senders have completed. when_all only accepts senders with a single value completion signature and on success concatenates all the input senders' value result datums into its own value completion operation. when_all_with_variant(sndrs...) is semantically equivalent to when_all(into_variant(sndrs)...), where sndrs is a pack of subexpressions whose types model sender. - -The names when_all and when_all_with_variant denote customization point objects. Let sndrs be a pack of subexpressions, let Sndrs be a pack of the types decltype((sndrs))..., and let CD be the type common_type_t. The expressions when_all(sndrs...) and when_all_with_variant(sndrs...) are ill-formed if any of the following is true: - -sizeof...(sndrs) is 0, or - -(sender && ...) is false, or - -CD is ill-formed. - -The expression when_all(sndrs...) is expression-equivalent to: - -transform_sender( - CD(), - make-sender(when_all, {}, sndrs...)) -The exposition-only class template impls-for ([exec.snd.general]) is specialized for when_all_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-env = see below; - static constexpr auto get-state = see below; - static constexpr auto start = see below; - static constexpr auto complete = see below; - }; -} -The member impls-for::get-attrs is initialized with a callable object equivalent to the following lambda expression: - -[](auto&&, auto&&... child) noexcept { - if constexpr (same_as) { - return empty_env(); - } else { - return MAKE-ENV(get_domain, CD()); - } -} -The member impls-for::get-env is initialized with a callable object equivalent to the following lambda expression: - -[](auto&&, State& state, const Receiver& rcvr) noexcept { - return JOIN-ENV( - MAKE-ENV(get_stop_token, state.stop_src.get_token()), get_env(rcvr)); -} -The member impls-for::get-state is initialized with a callable object equivalent to the following lambda expression: - -[](Sndr&& sndr, Rcvr& rcvr) noexcept(e) -> decltype(e) { - return e; -} -where e is the expression: - -std::forward(sndr).apply(make-state()) -and where make-state is the following exposition-only class template: - -template -concept max-1-sender-in = sender_in && // exposition only - (tuple_size_v> <= 1); - -enum class disposition { started, error, stopped }; // exposition only - -template -struct make-state { - template>... Sndrs> - auto operator()(auto, auto, Sndrs&&... sndrs) const { - using values_tuple = see below; - using errors_variant = see below; - using stop_callback = stop_callback_of_t>, on-stop-request>; - - struct state-type { - void arrive(Rcvr& rcvr) noexcept { - if (0 == --count) { - complete(rcvr); - } - } - - void complete(Rcvr& rcvr) noexcept; // see below - - atomic count{sizeof...(sndrs)}; // exposition only - inplace_stop_source stop_src{}; // exposition only - atomic disp{disposition::started}; // exposition only - errors_variant errors{}; // exposition only - values_tuple values{}; // exposition only - optional on_stop{nullopt}; // exposition only - }; - - return state-type{}; - } -}; -Let copy-fail be exception_ptr if decay-copying any of the child senders' result datums can potentially throw; otherwise, none-such, where none-such is an unspecified empty class type. - -The alias values_tuple denotes the type tuple, decayed-tuple, optional>...> if that type is well-formed; otherwise, tuple<>. - -The alias errors_variant denotes the type variant with duplicate types removed, where Es is the pack of the decayed types of all the child senders' possible error result datums. - -The member void state::complete(Rcvr& rcvr) noexcept behaves as follows: - -If disp is equal to disposition::started, evaluates: - -auto tie = [](tuple& t) noexcept { return tuple(t); }; -auto set = [&](auto&... t) noexcept { set_value(std::move(rcvr), std::move(t)...); }; - -on_stop.reset(); -apply( - [&](auto&... opts) noexcept { - apply(set, tuple_cat(tie(*opts)...)); - }, - values); -Otherwise, if disp is equal to disposition::error, evaluates: - -on_stop.reset(); -visit( - [&](Error& error) noexcept { - if constexpr (!same_as) { - set_error(std::move(rcvr), std::move(error)); - } - }, - errors); -Otherwise, evaluates: - -on_stop.reset(); -set_stopped(std::move(rcvr)); -The member impls-for::start is initialized with a callable object equivalent to the following lambda expression: - -[]( - State& state, Rcvr& rcvr, Ops&... ops) noexcept -> void { - state.on_stop.emplace( - get_stop_token(get_env(rcvr)), - on-stop-request{state.stop_src}); - if (state.stop_src.stop_requested()) { - state.on_stop.reset(); - set_stopped(std::move(rcvr)); - } else { - (start(ops), ...); - } -} -The member impls-for::complete is initialized with a callable object equivalent to the following lambda expression: - -[]( - this auto& complete, Index, State& state, Rcvr& rcvr, Set, Args&&... args) noexcept -> void { - if constexpr (same_as) { - if (disposition::error != state.disp.exchange(disposition::error)) { - state.stop_src.request_stop(); - TRY-EMPLACE-ERROR(state.errors, std::forward(args)...); - } - } else if constexpr (same_as) { - auto expected = disposition::started; - if (state.disp.compare_exchange_strong(expected, disposition::stopped)) { - state.stop_src.request_stop(); - } - } else if constexpr (!same_as>) { - if (state.disp == disposition::started) { - auto& opt = get(state.values); - TRY-EMPLACE-VALUE(complete, opt, std::forward(args)...); - } - } - - state.arrive(rcvr); -} -where TRY-EMPLACE-ERROR(v, e), for subexpressions v and e, is equivalent to: - -try { - v.template emplace(e); -} catch (...) { - v.template emplace(current_exception()); -} -if the expression decltype(auto(e))(e) is potentially throwing; otherwise, v.template emplace(e); and where TRY-EMPLACE-VALUE(c, o, as...), for subexpressions c, o, and pack of subexpressions as, is equivalent to: - -try { - o.emplace(as...); -} catch (...) { - c(Index(), state, rcvr, set_error, current_exception()); - return; -} -if the expression decayed-tuple{as...} is potentially throwing; otherwise, o.emplace(as...). - -The expression when_all_with_variant(sndrs...) is expression-equivalent to: - -transform_sender( - CD(), - make-sender(when_all_with_variant, {}, sndrs...)); -Given subexpressions sndr and env, if sender-for is false, then the expression when_all_with_variant.transform_sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, _, ...child] = sndr; -return when_all(into_variant(std::forward_like(child))...); -This causes the when_all_with_variant(sndrs...) sender to become when_all(into_variant(sndrs)...) when it is connected with a receiver whose execution domain does not customize when_all_with_variant. - -34.9.11.12. execution::into_variant [exec.into.variant] -into_variant adapts a sender with multiple value completion signatures into a sender with just one value completion signature consisting of a variant of tuples. - -The name into_variant denotes a pipeable sender adaptor object. For a subexpression sndr, let Sndr be decltype((sndr)). If Sndr does not satisfy sender, into_variant(sndr) is ill-formed. - -Otherwise, the expression into_variant(sndr) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(into_variant, {}, sndr)) -except that sndr is only evaluated once. - -The exposition-only class template impls-for ([exec.snd.general]) is specialized for into_variant as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - }; -} -The member impls-for::get-state is initialized with a callable object equivalent to the following lambda: - -[](Sndr&& sndr, Rcvr& rcvr) noexcept - -> type_identity, env_of_t>> { - return {}; -} -The member impls-for::complete is initialized with a callable object equivalent to the following lambda: - -[]( - auto, State, Rcvr& rcvr, Tag, Args&&... args) noexcept -> void { - if constexpr (same_as) { - using variant_type = typename State::type; - TRY-SET-VALUE(rcvr, variant_type(decayed-tuple{std::forward(args)...})); - } else { - Tag()(std::move(rcvr), std::forward(args)...); - } -} -34.9.11.13. execution::stopped_as_optional [exec.stopped.as.optional] -stopped_as_optional maps a sender’s stopped completion operation into a value completion operation as an disengaged optional. The sender’s value completion operation is also converted into an optional. The result is a sender that never completes with stopped, reporting cancellation by completing with an disengaged optional. - -The name stopped_as_optional denotes a pipeable sender adaptor object. For a subexpression sndr, let Sndr be decltype((sndr)). The expression stopped_as_optional(sndr) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(stopped_as_optional, {}, sndr)) -except that sndr is only evaluated once. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false, or if the type single-sender-value-type is ill-formed or void, then the expression stopped_as_optional.transform_sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, _, child] = sndr; -using V = single-sender-value-type; -return let_stopped( - then(std::forward_like(child), - [](Ts&&... ts) noexcept(is_nothrow_constructible_v) { - return optional(in_place, std::forward(ts)...); - }), - []() noexcept { return just(optional()); }); -34.9.11.14. execution::stopped_as_error [exec.stopped.as.error] -stopped_as_error maps an input sender’s stopped completion operation into an error completion operation as a custom error type. The result is a sender that never completes with stopped, reporting cancellation by completing with an error. - -The name stopped_as_error denotes a pipeable sender adaptor object. For some subexpressions sndr and err, let Sndr be decltype((sndr)) and let Err be decltype((err)). If the type Sndr does not satisfy sender or if the type Err doesn’t satisfy movable-value, stopped_as_error(sndr, err) is ill-formed. Otherwise, the expression stopped_as_error(sndr, err) is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(stopped_as_error, err, sndr)) -except that sndr is only evaluated once. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false, then the expression stopped_as_error.transform_sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, err, child] = sndr; -using E = decltype(auto(err)); -return let_stopped( - std::forward_like(child), - [err = std::forward_like(err)]() mutable noexcept(is_nothrow_move_constructible_v) { - return just_error(std::move(err)); - }); -34.9.12. Sender consumers [exec.consumers] -34.9.12.1. this_thread::sync_wait [exec.sync.wait] -this_thread::sync_wait and this_thread::sync_wait_with_variant are used to block the current thread of execution until the specified sender completes and to return its async result. sync_wait mandates that the input sender has exactly one value completion signature. - -Let sync-wait-env be the following exposition-only class type: - -namespace std::this_thread { - struct sync-wait-env { - execution::run_loop* loop; // exposition only - - auto query(execution::get_scheduler_t) const noexcept { - return loop->get_scheduler(); - } - - auto query(execution::get_delegation_scheduler_t) const noexcept { - return loop->get_scheduler(); - } - }; -} -Let sync-wait-result-type and sync-wait-with-variant-result-type be exposition-only alias templates defined as follows: - -namespace std::this_thread { - template Sndr> - using sync-wait-result-type = - optional>; - - template Sndr> - using sync-wait-with-variant-result-type = - optional>; -} -The name this_thread::sync_wait denotes a customization point object. For a subexpression sndr, let Sndr be decltype((sndr)). If sender_in is false, the expression this_thread::sync_wait(sndr) is ill-formed. Otherwise, it is expression-equivalent to the following, except that sndr is evaluated only once: - -apply_sender(get-domain-early(sndr), sync_wait, sndr) -Mandates: - -The type sync-wait-result-type is well-formed. - -same_as> is true, where e is the apply_sender expression above. - -Let sync-wait-state and sync-wait-receiver be the following exposition-only class templates: - -namespace std::this_thread { - template - struct sync-wait-state { // exposition only - execution::run_loop loop; // exposition only - exception_ptr error; // exposition only - sync-wait-result-type result; // exposition only - }; - - template - struct sync-wait-receiver { // exposition only - using receiver_concept = execution::receiver_t; - sync-wait-state* state; // exposition only - - template - void set_value(Args&&... args) && noexcept; - - template - void set_error(Error&& err) && noexcept; - - void set_stopped() && noexcept; - - sync-wait-env get_env() const noexcept { return {&state->loop}; } - }; -} -template -void set_value(Args&&... args) && noexcept; -Effects: Equivalent to: - -try { - state->result.emplace(std::forward(args)...); -} catch (...) { - state->error = current_exception(); -} -state->loop.finish(); -template -void set_error(Error&& err) && noexcept; -Effects: Equivalent to: - -state->error = AS-EXCEPT-PTR(std::forward(err)); // see [exec.general] -state->loop.finish(); -void set_stopped() && noexcept; -Effects: Equivalent to state->loop.finish(). - -For a subexpression sndr, let Sndr be decltype((sndr)). If sender_to> is false, the expression sync_wait.apply_sender(sndr) is ill-formed; otherwise, it is equivalent to: - -sync-wait-state state; -auto op = connect(sndr, sync-wait-receiver{&state}); -start(op); - -state.loop.run(); -if (state.error) { - rethrow_exception(std::move(state.error)); -} -return std::move(state.result); -The behavior of this_thread::sync_wait(sndr) is undefined unless: - -It blocks the current thread of execution ([defns.block]) with forward progress guarantee delegation ([intro.progress]) until the specified sender completes. The default implementation of sync_wait achieves forward progress guarantee delegation by providing a run_loop scheduler via the get_delegation_scheduler query on the sync-wait-receiver’s environment. The run_loop is driven by the current thread of execution. - -It returns the specified sender’s async results as follows: - -For a value completion, the result datums are returned in a tuple in an engaged optional object. - -For an error completion, an exception is thrown. - -For a stopped completion, a disengaged optional object is returned. - -The name this_thread::sync_wait_with_variant denotes a customization point object. For a subexpression sndr, let Sndr be decltype(into_variant(sndr)). If sender_in is false, this_thread::sync_wait_with_variant(sndr) is ill-formed. Otherwise, it is expression-equivalent to the following, except sndr is evaluated only once: - -apply_sender(get-domain-early(sndr), sync_wait_with_variant, sndr) -Mandates: - -The type sync-wait-with-variant-result-type is well-formed. - -same_as> is true, where e is the apply_sender expression above. - -If callable is false, the expression sync_wait_with_variant.apply_sender(sndr) is ill-formed. Otherwise, it is equivalent to: - -using result_type = sync-wait-with-variant-result-type; -if (auto opt_value = sync_wait(into_variant(sndr))) { - return result_type(std::move(get<0>(*opt_value))); -} -return result_type(nullopt); -The behavior of this_thread::sync_wait_with_variant(sndr) is undefined unless: - -It blocks the current thread of execution ([defns.block]) with forward progress guarantee delegation ([intro.progress]) until the specified sender completes. The default implementation of sync_wait_with_variant achieves forward progress guarantee delegation by relying on the forward progress guarantee delegation provided by sync_wait. - -It returns the specified sender’s async results as follows: - -For a value completion, the result datums are returned in an engaged optional object that contains a variant of tuples. - -For an error completion, an exception is thrown. - -For a stopped completion, a disengaged optional object is returned. - -34.10. Sender/receiver utilities [exec.utils] -34.10.1. execution::completion_signatures [exec.utils.cmplsigs] -completion_signatures is a type that encodes a set of completion signatures ([async.ops]). - -[Example: - -struct my_sender { - using sender_concept = sender_t; - using completion_signatures = - execution::completion_signatures< - set_value_t(), - set_value_t(int, float), - set_error_t(exception_ptr), - set_error_t(error_code), - set_stopped_t()>; -}; - -// Declares my_sender to be a sender that can complete by calling -// one of the following for a receiver expression rcvr: -// set_value(rcvr) -// set_value(rcvr, int{...}, float{...}) -// set_error(rcvr, exception_ptr{...}) -// set_error(rcvr, error_code{...}) -// set_stopped(rcvr) --- end example] - -[exec.utils.cmplsigs] makes use of the following exposition-only entities: - -template - concept completion-signature = see below; - -template - struct indirect-meta-apply { - template class T, class... As> - using meta-apply = T; // exposition only - }; - -template - concept always-true = true; // exposition only -A type Fn satisfies completion-signature if and only if it is a function type with one of the following forms: - -set_value_t(Vs...), where Vs is a pack of object or reference types. - -set_error_t(Err), where Err is an object or reference type. - -set_stopped_t() - -template class Tuple, - template class Variant> - using gather-signatures = see below; -Let Fns be a pack of the arguments of the completion_signatures specialization named by Completions, let TagFns be a pack of the function types in Fns whose return types are Tag, and let Tsn be a pack of the function argument types in the n-th type in TagFns. Then, given two variadic templates Tuple and Variant, the type gather-signatures names the type META-APPLY(Variant, META-APPLY(Tuple, Ts0...), META-APPLY(Tuple, Ts1...), ... META-APPLY(Tuple, Tsm-1...)), where m is the size of the pack TagFns and META-APPLY(T, As...) is equivalent to: - -typename indirect-meta-apply>::template meta-apply; -The purpose of META-APPLY is to make it valid to use non-variadic templates as Variant and Tuple arguments to gather-signatures. - -namespace std::execution { - template - struct completion_signatures {}; - - template class Tuple = decayed-tuple, - template class Variant = variant-or-empty> - requires sender_in - using value_types_of_t = - gather-signatures, Tuple, Variant>; - - template class Variant = variant-or-empty> - requires sender_in - using error_types_of_t = - gather-signatures, type_identity_t, Variant>; - - template - requires sender_in - inline constexpr bool sends_stopped = - !same_as< - type-list<>, - gather-signatures, type-list, type-list>>; -} -34.10.2. execution::transform_completion_signatures [exec.utils.tfxcmplsigs] -transform_completion_signatures is an alias template used to transform one set of completion signatures into another. It takes a set of completion signatures and several other template arguments that apply modifications to each completion signature in the set to generate a new specialization of completion_signatures. - -[Example: - -// Given a sender Sndr and an environment Env, adapt the completion -// signatures of Sndr by lvalue-ref qualifying the values, adding an additional -// exception_ptr error completion if its not already there, and leaving the -// other completion signatures alone. -template - using my_set_value_t = - completion_signatures< - set_value_t(add_lvalue_reference_t...)>; - -using my_completion_signatures = - transform_completion_signatures< - completion_signatures_of_t, - completion_signatures, - my_set_value_t>; --- end example] - -[exec.utils.tfxcmplsigs] makes use of the following exposition-only entities: - -template - using default-set-value = - completion_signatures; - -template - using default-set-error = - completion_signatures; -namespace std::execution { - template, - template class SetValue = default-set-value, - template class SetError = default-set-error, - valid-completion-signatures SetStopped = - completion_signatures> - using transform_completion_signatures = - completion_signatures; -} -SetValue shall name an alias template such that for any pack of types As, the type SetValue is either ill-formed or else valid-completion-signatures> is satisfied. - -SetError shall name an alias template such that for any type Err, SetError is either ill-formed or else valid-completion-signatures> is satisfied. - -Then: - -Let Vs... be a pack of the types in the type-list named by gather-signatures. - -Let Es... be a pack of the types in the type-list named by gather-signatures, where error-list is an alias template such that error-list is type-list...>. - -Let Ss name the type completion_signatures<> if gather-signatures is an alias for the type type-list<>; otherwise, SetStopped. - -Then: - -If any of the above types are ill-formed, then transform_completion_signatures is ill-formed. - -Otherwise, transform_completion_signatures is the type completion_signatures where Sigs... is the unique set of types in all the template arguments of all the completion_signatures specializations in the set AdditionalSignatures, Vs..., Es..., Ss. - -34.11. Execution contexts [exec.ctx] -34.11.1. execution::run_loop [exec.run.loop] -A run_loop is an execution resource on which work can be scheduled. It maintains a thread-safe first-in-first-out queue of work. Its run() member function removes elements from the queue and executes them in a loop on the thread of execution that calls run(). - -A run_loop instance has an associated count that corresponds to the number of work items that are in its queue. Additionally, a run_loop instance has an associated state that can be one of starting, running, or finishing. - -Concurrent invocations of the member functions of run_loop other than run and its destructor do not introduce data races. The member functions pop_front, push_back, and finish execute atomically. - -Recommended practice: Implementations are encouraged to use an intrusive queue of operation states to hold the work units to make scheduling allocation-free. - -namespace std::execution { - class run_loop { - // [exec.run.loop.types] Associated types - class run-loop-scheduler; // exposition only - class run-loop-sender; // exposition only - struct run-loop-opstate-base { // exposition only - virtual void execute() = 0; // exposition only - run_loop* loop; // exposition only - run-loop-opstate-base* next; // exposition only - }; - template - using run-loop-opstate = unspecified; // exposition only - - // [exec.run.loop.members] Member functions: - run-loop-opstate-base* pop-front(); // exposition only - void push-back(run-loop-opstate-base*); // exposition only - - public: - // [exec.run.loop.ctor] construct/copy/destroy - run_loop() noexcept; - run_loop(run_loop&&) = delete; - ~run_loop(); - - // [exec.run.loop.members] Member functions: - run-loop-scheduler get_scheduler(); - void run(); - void finish(); - }; -} -34.11.1.1. Associated types [exec.run.loop.types] -class run-loop-scheduler; -run-loop-scheduler is an unspecified type that models scheduler. - -Instances of run-loop-scheduler remain valid until the end of the lifetime of the run_loop instance from which they were obtained. - -Two instances of run-loop-scheduler compare equal if and only if they were obtained from the same run_loop instance. - -Let sch be an expression of type run-loop-scheduler. The expression schedule(sch) has type run-loop-sender and is not potentially-throwing if sch is not potentially-throwing. - -class run-loop-sender; -run-loop-sender is an exposition-only type that satisfies sender. For any type Env, completion_signatures_of_t is: - -completion_signatures -An instance of run-loop-sender remains valid until the end of the lifetime of its associated run_loop instance. - -Let sndr be an expression of type run-loop-sender, let rcvr be an expression such that receiver_of is true where CS is the completion_signatures specialization above. Let C be either set_value_t or set_stopped_t. Then: - -The expression connect(sndr, rcvr) has type run-loop-opstate> and is potentially-throwing if and only if (void(sndr), auto(rcvr)) is potentially-throwing. - -The expression get_completion_scheduler(get_env(sndr)) is potentially-throwing if and only if sndr is potentially-throwing, has type run-loop-scheduler, and compares equal to the run-loop-scheduler instance from which sndr was obtained. - -template - struct run-loop-opstate; -run-loop-opstate inherits privately and unambiguously from run-loop-opstate-base. - -Let o be a non-const lvalue of type run-loop-opstate, and let REC(o) be a non-const lvalue reference to an instance of type Rcvr that was initialized with the expression rcvr passed to the invocation of connect that returned o. Then: - -The object to which REC(o) refers remains valid for the lifetime of the object to which o refers. - -The type run-loop-opstate overrides run-loop-opstate-base::execute() such that o.execute() is equivalent to: - -if (get_stop_token(REC(o)).stop_requested()) { - set_stopped(std::move(REC(o))); -} else { - set_value(std::move(REC(o))); -} -The expression start(o) is equivalent to: - -try { - o.loop->push-back(addressof(o)); -} catch(...) { - set_error(std::move(REC(o)), current_exception()); -} -34.11.1.2. Constructor and destructor [exec.run.loop.ctor] -run_loop() noexcept; -Postconditions: count is 0 and state is starting. - -~run_loop(); -Effects: If count is not 0 or if state is running, invokes terminate(). Otherwise, has no effects. - -34.11.1.3. Member functions [exec.run.loop.members] -run-loop-opstate-base* pop-front(); -Effects: Blocks ([defns.block]) until one of the following conditions is true: - -count is 0 and state is finishing, in which case pop-front returns nullptr; or - -count is greater than 0, in which case an item is removed from the front of the queue, count is decremented by 1, and the removed item is returned. - -void push-back(run-loop-opstate-base* item); -Effects: Adds item to the back of the queue and increments count by 1. - -Synchronization: This operation synchronizes with the pop-front operation that obtains item. - -run-loop-scheduler get_scheduler(); -Returns: An instance of run-loop-scheduler that can be used to schedule work onto this run_loop instance. - -void run(); -Precondition: state is starting. - -Effects: Sets the state to running. Then, equivalent to: - -while (auto* op = pop-front()) { - op->execute(); -} -Remarks: When state changes, it does so without introducing data races. - -void finish(); -Effects: Changes state to finishing. - -Synchronization: finish synchronizes with the pop-front operation that returns nullptr. - -34.12. Coroutine utilities [exec.coro.utils] -34.12.1. execution::as_awaitable [exec.as.awaitable] -as_awaitable transforms an object into one that is awaitable within a particular coroutine. [exec.coro.utils] makes use of the following exposition-only entities: - -namespace std::execution { - template - concept awaitable-sender = - single-sender> && - sender_to && // see below - requires (Promise& p) { - { p.unhandled_stopped() } -> convertible_to>; - }; - - template - class sender-awaitable; -} -The type sender-awaitable is equivalent to: - -namespace std::execution { - template - class sender-awaitable { - struct unit {}; // exposition only - using value-type = // exposition only - single-sender-value-type>; - using result-type = // exposition only - conditional_t, unit, value-type>; - struct awaitable-receiver; // exposition only - - variant result{}; // exposition only - connect_result_t state; // exposition only - - public: - sender-awaitable(Sndr&& sndr, Promise& p); - static constexpr bool await_ready() noexcept { return false; } - void await_suspend(coroutine_handle) noexcept { start(state); } - value-type await_resume(); - }; -} -awaitable-receiver is equivalent to: - -struct awaitable-receiver { - using receiver_concept = receiver_t; - variant* result-ptr; // exposition only - coroutine_handle continuation; // exposition only - // ... see below -}; -Let rcvr be an rvalue expression of type awaitable-receiver, let crcvr be a const lvalue that refers to rcvr, let vs be a pack of subexpressions, and let err be an expression of type Err. Then: - -If constructible_from is satisfied, the expression set_value(rcvr, vs...) is equivalent to: - -try { - rcvr.result-ptr->template emplace<1>(vs...); -} catch(...) { - rcvr.result-ptr->template emplace<2>(current_exception()); -} -rcvr.continuation.resume(); -Otherwise, set_value(rcvr, vs...) is ill-formed. - -The expression set_error(rcvr, err) is equivalent to: - -rcvr.result-ptr->template emplace<2>(AS-EXCEPT-PTR(err)); // see [exec.general] -rcvr.continuation.resume(); -The expression set_stopped(rcvr) is equivalent to: - -static_cast>(rcvr.continuation.promise().unhandled_stopped()).resume(); -For any expression tag whose type satisfies forwarding-query and for any pack of subexpressions as, get_env(crcvr).query(tag, as...) is expression-equivalent to tag(get_env(as_const(crcvr.continuation.promise())), as...). - -sender-awaitable(Sndr&& sndr, Promise& p); - -Effects: Initializes state with connect(std::forward(sndr), awaitable-receiver{addressof(result), coroutine_handle::from_promise(p)}). - -value-type await_resume(); - -Effects: Equivalent to: - -if (result.index() == 2) - rethrow_exception(get<2>(result)); -if constexpr (!is_void_v) - return std::forward(get<1>(result)); -as_awaitable is a customization point object. For subexpressions expr and p where p is an lvalue, Expr names the type decltype((expr)) and Promise names the type decay_t, as_awaitable(expr, p) is expression-equivalent to: - -expr.as_awaitable(p) if that expression is well-formed. - -Mandates: is-awaitable is true, where A is the type of the expression above. - -Otherwise, (void(p), expr) if is-awaitable is true, where U is an unspecified class type that is not Promise and that lacks a member named await_transform. - -Preconditions: is-awaitable is true and the expression co_await expr in a coroutine with promise type U is expression-equivalent to the same expression in a coroutine with promise type Promise. - -Otherwise, sender-awaitable{expr, p} if awaitable-sender is true. - -Otherwise, (void(p), expr). - -except that the evaluations of expr and p are indeterminately sequenced. - -34.12.2. execution::with_awaitable_senders [exec.with.awaitable.senders] -with_awaitable_senders, when used as the base class of a coroutine promise type, makes senders awaitable in that coroutine type. - -In addition, it provides a default implementation of unhandled_stopped() such that if a sender completes by calling set_stopped, it is treated as if an uncatchable "stopped" exception were thrown from the await-expression. The coroutine is never resumed, and the unhandled_stopped of the coroutine caller’s promise type is called. - -namespace std::execution { - template - struct with_awaitable_senders { - template - requires (!same_as) - void set_continuation(coroutine_handle h) noexcept; - - coroutine_handle<> continuation() const noexcept { return continuation; } - - coroutine_handle<> unhandled_stopped() noexcept { - return stopped-handler(continuation.address()); - } - - template - see below await_transform(Value&& value); - - private: - // exposition only - [[noreturn]] static coroutine_handle<> default-unhandled-stopped(void*) noexcept { - terminate(); - } - coroutine_handle<> continuation{}; // exposition only - // exposition only - coroutine_handle<> (*stopped-handler)(void*) noexcept = &default-unhandled-stopped; - }; -} -template - requires (!same_as) -void set_continuation(coroutine_handle h) noexcept; -Effects: Equivalent to: - -continuation = h; -if constexpr ( requires(OtherPromise& other) { other.unhandled_stopped(); } ) { - stopped-handler = [](void* p) noexcept -> coroutine_handle<> { - return coroutine_handle::from_address(p) - .promise().unhandled_stopped(); - }; -} else { - stopped-handler = &default-unhandled-stopped; -} -template -call-result-t await_transform(Value&& value); -Effects: Equivalent to: - -return as_awaitable(std::forward(value), static_cast(*this)); -Index -Terms defined by this specification -associated, in § 33.3.1 -asynchronous operation, in § 34.3 -asynchronous operation lifetime, in § 34.3 -async lifetime, in § 34.3 -async result, in § 34.3 -attributes, in § 34.3 -awaitable, in § 34.9.3 -callback function, in § 33.3.3 -caller, in § 34.3 -child operations, in § 34.3 -child sender, in § 34.3 -complete, in § 34.3 -completion function, in § 34.3 -completion operation, in § 34.3 -completion scheduler, in § 34.3 -completion signature, in § 34.3 -completion tag, in § 34.3 -connect, in § 34.3 -decay-copied from, in § 16 -disengaged, in § 33.3.3 -disposition, in § 34.3 -environment, in § 34.3 -error completion, in § 34.3 -execution resource, in § 34.3 -multi-shot sender, in § 4.7 -operation state, in § 34.3 -parent operation, in § 34.3 -parent sender, in § 34.3 -permissible completion, in § 34.9.2 -pipeable, in § 4.12 -piped, in § 4.12 -query, in § 34.2.1 -queryable object, in § 34.2.1 -query object, in § 34.2.1 -receive, in § 34.3 -receiver, in § 34.3 -schedule-expression, in § 34.3 -scheduler, in § 34.3 -schedule sender, in § 34.3 -send, in § 34.3 -sender, in § 34.3 -sender adaptor, in § 34.3 -sender algorithm, in § 34.3 -sender consumer, in § 34.3 -sender factory, in § 34.3 -single-shot sender, in § 4.7 -start, in § 34.3 -stoppable callback deregistration, in § 33.3.3 -stoppable callback registration, in § 33.3.3 -stopped completion, in § 34.3 -stop request, in § 33.3.1 -stop request operation, in § 33.3.3 -stop state, in § 33.3.1 -Strictly lazy submission, in § 5.5 -value completion, in § 34.3 -References -Informative References -[HPX] -Hartmut Kaiser; et al. HPX - The C++ Standard Library for Parallelism and Concurrency. URL: https://doi.org/10.21105/joss.02352 -[N4885] -Thomas Köppe. Working Draft, Standard for Programming Language C++. 17 March 2021. URL: https://wg21.link/n4885 -[P0443R14] -Jared Hoberock, Michael Garland, Chris Kohlhoff, Chris Mysen, H. Carter Edwards, Gordon Brown, D. S. Hollman. A Unified Executors Proposal for C++. 15 September 2020. URL: https://wg21.link/p0443r14 -[P0981R0] -Richard Smith, Gor Nishanov. Halo: coroutine Heap Allocation eLision Optimization: the joint response. 18 March 2018. URL: https://wg21.link/p0981r0 -[P1056R1] -Lewis Baker, Gor Nishanov. Add lazy coroutine (coroutine task) type. 7 October 2018. URL: https://wg21.link/p1056r1 -[P1895R0] -Lewis Baker, Eric Niebler, Kirk Shoop. tag_invoke: A general pattern for supporting customisable functions. 8 October 2019. URL: https://wg21.link/p1895r0 -[P1897R3] -Lee Howes. Towards C++23 executors: A proposal for an initial set of algorithms. 16 May 2020. URL: https://wg21.link/p1897r3 -[P2175R0] -Lewis Baker. Composable cancellation for sender-based async operations. 15 December 2020. URL: https://wg21.link/p2175r0 -[P2855R1] -Ville Voutilainen. Member customization points for Senders and Receivers. 22 February 2024. URL: https://wg21.link/p2855r1 -[P2999R3] -Eric Niebler. Sender Algorithm Customization. 13 December 2023. URL: https://wg21.link/p2999r3 -[P3149R3] -Ian Petersen, Ján Ondrušek; Jessica Wong; Kirk Shoop; Lee Howes; Lucian Radu Teodorescu;. async_scope — Creating scopes for non-sequential concurrency. 22 May 2024. URL: https://wg21.link/p3149r3 -[P3175R3] -Eric Niebler. Reconsidering the std::execution::on algorithm. 2024-06-24. URL: https://wg21.link/P3175R3 -[P3187R1] -Kirk Shoop, Lewis Baker. remove ensure_started and start_detached from P2300. 21 March 2024. URL: https://wg21.link/p3187r1 -[P3303R1] -Eric Niebler. Fixing Lazy Sender Algorithm Customization. 2024-06-24. URL: https://wg21.link/P3303R1 \ No newline at end of file diff --git a/context/research/p2762.md b/context/research/p2762.md deleted file mode 100644 index 97bdba4..0000000 --- a/context/research/p2762.md +++ /dev/null @@ -1,1251 +0,0 @@ -Sender/Receiver Interface For Networking -Document #: P2762R2 -Date: 2023-10-12 -Project: Programming Language C++ -Audience: Networking Study Group (SG4) -Library Evolution Working Group -Reply-to: Dietmar Kühl (Bloomberg) - -Contents -1 Revisions 2 -1.1 Changes for R2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 -1.2 Changes for R1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 -2 Motivation 2 -3 Related Work 3 -4 Design Choices 3 -4.1 Obtaining the Scheduler . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 -4.2 Error Reporting . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 -4.3 Member vs. Non-Member Operation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 -4.4 I/O Scheduler Interface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 -4.5 Timer Class or Just a Sender . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 -4.6 Higher Level Tools . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 -4.7 Sender Adaptors . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 -4.8 Awaitable Senders . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 -5 Cancellation Concern 9 -6 Discussion 9 -6.1 Support A system execution context . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10 -6.2 Resolution . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10 -6.3 Naming . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10 -6.4 Constraints On Used Scheduler . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -6.5 Required Features . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -6.5.1 Extensible Scheduler Interface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -6.5.2 TLS Support . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -6.5.3 Buffer Pools . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 -6.5.4 Async Streams . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 -6.5.5 Networking Algorithms . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 -7 Questions 12 -8 Wording for Networking CPOs 13 -9 Networking Senders [net.sender] 13 -9.1 General [net.sender.general] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 -9.2 Network Sender Operations Synopsis [net.sender.syn] . . . . . . . . . . . . . . . . . . . . . . . . . 14 -1 -9.3 Network Sender Operations [net.sender.operations] . . . . . . . . . . . . . . . . . . . . . . . . . . 15 -9.3.1 net::async_accept [net.sender.async.accept] . . . . . . . . . . . . . . . . . . . . . . . . . 15 -9.3.2 net::async_connect [net.sender.async.connect] . . . . . . . . . . . . . . . . . . . . . . . 17 -9.3.3 net::async_read_some [net.sender.async.read.some] . . . . . . . . . . . . . . . . . . . . . 18 -9.3.4 net::async_receive [net.sender.async.receive] . . . . . . . . . . . . . . . . . . . . . . . . 19 -9.3.5 net::async_receive_from [net.sender.async.receive.from] . . . . . . . . . . . . . . . . . . 20 -9.3.6 net::async_send [net.sender.async.send] . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 -9.3.7 net::async_send_to [net.sender.async.send.to] . . . . . . . . . . . . . . . . . . . . . . . . 24 -9.3.8 net::async_wait [net.sender.async.wait] . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 -9.3.9 net::async_write_some [net.sender.async.write.some] . . . . . . . . . . . . . . . . . . . . 27 -9.4 Network Algorithms [net.algorithms] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 -9.4.1 General [net.algorithms.general] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 -9.4.2 net::async_resolve_name [net.sender.async.resolve.name] . . . . . . . . . . . . . . . . . 28 -9.4.3 net::async_resolve_address [net.sender.async.resolve.address] . . . . . . . . . . . . . . 28 -This document proposes the addition of senders for asynchronous networking operations to the Networking TS -and, ultimately, to the C++ Standard. As the std::execution proposal isn’t landed, yet, this proposal is -kept at a high level and is primarily intended to discuss what a potential interface for asynchronous networking -operations could look like. -1 Revisions -1.1 Changes for R2 -— Added using a socket’s context to the discussion on Obtaining the Scheduler. -— Added discussion of the requirement that the scheduler interface needs to be extensible for non-standard -operations -— Added discussion of awaitable senders to be more friendly to coroutines. -1.2 Changes for R1 -— Added a section with questions to aggregate decisions to poll (and once polled, record the respective -outcome). -— Clarified in the motivation why the Networking TS is targeted. -— Added some explanation why getting the scheduler from a receiver isn’t entirely the obvious choice (at the -end of Obtaining the Scheduler). -— Added a section with discussions to capture topics which were brought up and provide suitable answers -and changes. -— Added a section with Network Algorithms [net.algorithms] for resolution. -— Added a section Sender Adaptors to explain why the networking senders should be sender adaptors. -— Enhanced the wording section and added async_wait. -2 Motivation -The std::execution proposal (P2300) proposes sender/receiver as a general framework for structured and -composable concurrency. The currently proposed components define a framework primary targeted at concurrent -execution within a program. If this framework gets adopted, it should be possible to integrate other asynchronous -work like networking. To facilitate such integration, it is necessary to define a suitable set of senders for the -relevant asynchronous network operations. -The proposed changes augment the Networking TS rather than defining entirely new networking components. -The main dispute over the Networking TS is the model for asynchronous execution. The vocabulary used to -interact with networking facilities like addresses, socket, protocols, etc. doesn’t need to be replaced entirely to -support sender/receiver. The sender/receiver capabilities can be added which is what the current proposal does. -2 -If the current direction of having only one model for asynchronous operations is confirmed it will need to also -remove the executor-based facilities. -3 Related Work -The components defined by P2300 provide a complete framework for managing asynchronous operations and -no other facilities beyond senders for the managing the networking operations are needed. The Networking TS -defines its own framework for asynchronous operations. This paper does not propose the removal of the other -framework; whether the asynchronous framework from the Networking TS should be retained or removed is a -separate discussion. -There is a proposal for Standard Secure Networking (P2586). The current proposal consists of a high level -description and a few possibly usage examples. Based on the usage example, this proposal does not include -any binding to an asynchronous system. At most, it gets to the question on whether a coroutine interface -should be provided to its “poll” facility. So far I haven’t created a binding of the interfaces in this proposal -to the facilities proposed by P2586 but I don’t think there would be any problem doing so. From the current -document, it isn’t clear to me whether an active “poll” can be interrupted to add new work. Whether it would -be a reasonable implementation choice to use P2586 as the base implementation isn’t quite clear, as it seems -beneficial to potentially use a completion interface, e.g., io_uring, directly. -The focus of P2586 is secured networking and I haven’t managed to experiment with a secured version of the -proposed networking senders, yet. -P2586 makes some claims about allocations needed for a design based on P2300; there is actually no need to -do any allocations at all! The current experimental implementation (it is part of my experimental standard -library) doesn’t use any allocations in the networking senders (unless variable sized scatter/gather buffers are -specified in a way incompatible with an array of iovec). When using poll() to wait for activity, currently a -std::vector<::pollfd> and a std::vector of completions are used. However, it would be easily possible to -specify an interface to a suitable I/O context providing control over the maximum size of these arrays and their -required memory to avoid any allocations -4 Design Choices -The basic interface of the senders for the asynchronous network operations is informed by the Networking TS: -the available operations and their arguments will be similar. Even so there are some design choices. In most -cases, the alternatives aren’t exclusive and multiple variations can be supported to support different uses. -The sample code for the different considerations concentrates on the respective choice being considered. As other -design choices may affect the resulting code, one of the corresponding options is picked. The different design -choices are mostly orthogonal, although some of the choices (notably whether the operations should be member -functions) may limit the possibilities for other considerations. -4.1 Obtaining the Scheduler -The networking operations need a suitable context dealing with the asynchronousity, i.e., something using -poll(2), epoll(2), kqueue(2), io_uring(2), completion ports, etc., to schedule the operation. The context is -abstracted by a scheduler capable of scheduling the respective networking operations. There are a few options -for how the scheduler can be obtained: -1. The operation is used as a sender factory and the scheduler is passed in as an argument. This approach -makes the scheduling explict when creating the asynchronous operation, e.g.: -auto make_accept(auto scheduler, auto& socket) { -return async::accept(scheduler, socket); -} -3 -2. The operation is used as a sender adapter and the scheduler is obtained using -get_completion_scheduler(s) from the upstream sender, e.g.: -auto make_accept(auto scheduler, auto& socket) { -return schedule(scheduler) -| async::accept(socket) -; -} -3. The operation is used as a sender factory and the sheduler is obtained using get_scheduler(get_env(r)) -from the downstream sender. This approach allows imbuing a work graph with a scheduler specified from -the usage end, e.g.: -auto make_accept(auto scheduler, auto& socket) { -return on(scheduler, async::accept(socket)); -} -4. It was pointed out networking objects like sockets are specific to a context: that is necessary when using -I/O Completion Ports or a TLS library and yields advantages when using, e.g., io_uring(2). Explicitly -using a separate scheduler for the networking operation could lead to mismatches detected only at run-time. -It may, thus, be reasonable to schedule networking entirely based on the objects operation on: -auto make_accept(auto& socket) { -return async::accept(socket); -} -The most useful of these options seems to be the third one, i.e., injecting the used scheduler from the point -where the asynchronous work is actually used. The other two options require knowledge of the scheduler while -building up the asynchronous work. -As a potential variation of the second and third option, a specific customization point name, e.g., -get_completion_io_scheduler or get_io_scheduler, could be used. Using different names could enable the separation of the I/O scheduler from schedulers dedicated to doing work. These queries could fall back -to the respective non-specific queries when not provided. -Unfortunately, getting the scheduler from the usage side has an implication on how the work graph is built: for -some schedulers, the various operations may need to schedule additional asynchronous operations. For example, -a receive operation using TLS may need to set up a way of sending and receiving bytes from a lower level -scheduler. When the scheduler is injected through the receiver the operation and used scheduler are brought -together rather late, i.e., when connect(sender, receiver)ing and when the work graph is already built. -For the examples below, the third option is assumed. However, all three options are viable candidates. -4.2 Error Reporting -For the Networking TS, errors of asynchronous operations are reported using an std::error_code argument as -part of the completion signature. As there is exactly one completion function used, there isn’t really a different -alternative. Using receivers supports multiple completion functions, thereby allowing multiple choices: -1. The operation could complete using one set_value call using the same fused completion consisting of an -std::error_code and the other completion arguments as the Networking TS does, e.g.: -auto sender -= async::read(socket, buffer) -| then([](error_code const& ec, int n) { ... } -; -Within a coroutine, structured binding could be used to decompose the result, e.g.: -4 -auto[ec, n] = co_await async::read(socket, buffer); -2. As the error path is different in the completing functions, it can be reasonable to call different set_value -functions: one with an std::error_code argument and another one (or even multiple ones) with the -arguments for the success case. This approch wouldn’t work with coroutines as these are restricted to -using just one completion signature. Also, a downstream sender would need an overloaded set_value to -deal with the result: -auto sender -= async::read(socket, buffer) -| overload( -[](int n){ /* success path */ }, -[](error_code const& ec){ /* error path */ } -) -; -3. Similar to the previous alternative but instead of reporting errors using the set_value channel using the -set_error channel, e.g.: -auto sender = async::read(socket, buffer) -| then([](int n) { /* success path */ } -| upon_error([](error_code const& ec) { /* error path */ } -; -While this approach works with coroutines, it would end up using exceptions, e.g.: -try { -int n = co_await async::read(socket, buffer); -// success path -} catch (error_code const& ec) { -// error path -} -However, when using coroutines, it would be possible to use a generic algorithm fusing the set_value and -the set_error results back into one set_value result to avoid an exception. -4. There may even be space for a combination of reporting some errors using set_value while reporting -others using set_error, depending on the severity of the error. -5. With senders getting composed in a structured form, it may be reasonable to offer passing a reference to an -std::error_code and populating that when present and otherwise reporting the error on the set_error -channel. That would be similar to the synchronous networking operations of the Networking TS reporting -errors through the passed argument or an exception. This approach would work reasonably well using -coroutines: -error_code ec; -int n = co_await async::read(socket, buffer, ec); -if (!ec) -/* success path */; -else -/* error path */ -The most basic variations seems to use a combination of set_value for the successful case and set_error for -the failure cases; the other combinations can be build from that. Also, recognizing an error can be used by -algorithms to decide continuing differently upon error, e.g., cancelling other operation for a when_all. -However, some of the error cases may have been partial successes. In that case, using the set_error channel -taking just one argument is somewhat limiting. On the other hand, when substantial work is done and partial -5 -successes become reasonable, it is likely that intermediate results are to be produced and algorithms of a different -shape are used anyway. -When using asynchronous operations within a coroutine, there is only one set_value supported which can, however, return multiple values using a std::tuple, that is then likely decomposed using structured binding. That -is when using coroutine defining different set_value channels isn’t an option. For a coroutine, the set_error -channel would be turned into an exception. With variations of the asynchronous operations taking an optional -std::error_code reference as an argument, the coroutine experience would be similar to the synchronous code. -Likewise, a coroutine-friendly version of the operations can be provided. -It is possible to offer a combination of the different options. The design choice would name the operations (or the -namespace they live in) appropriately. The examples here assume using set_value and set_error for success -and error handling. -4.3 Member vs. Non-Member Operation -The Networking TS uses both member and non-member functions for its operations. Member functions are what -users are used to from other languages, where there often aren’t different option. The problem is compounded -by many IDEs providing simple name completions for member functions. For example: -auto sender = socket.async_read(buffer) -| then([](auto&&...){ /* use result */ }) -; -Similarly, when using coroutines: -auto[ec, n] = co_await socket.async_read(buffer); -On the hand side, CPOs can’t be member functions (well, CPOs are classes with function call member functions -but they don’t really look like member functions and they aren’t a member of some entity operated on). Also, -adding members to classes tends to lead to “kitchen sink” classes acquiring ever more operations over time -(see, e.g., std::basic_string). The potentially fairly large number of variations (see other design choices) -is probably easier managed using non-member function. For example, there may be groups of operations in -different namespaces based on their intended use, e.g., async for the senders directly used to chain operations -and coro for senders used within coroutines. -auto sender = async::read(socket, buffer) -| then([](auto&&...){ /* use result */ } -; -Similarly, when using coroutines: -auto[ec, n] = co_await coro::read(socket, buffer); -As there is generally no entity used with the P2300 algorithms, these aren’t member of classes. For the network -operations there is the socket providing an entity and the operations could be defined as member functions of -these. Using non-member names providing the full variation of options doesn’t exclude using member functions -for the expected likely use cases, probably just delegating to the respective non-member operations. The examples -here don’t use member functions. -4.4 I/O Scheduler Interface -The networking (or, more general, I/O) operations will require being scheduled on a special context and being -run on a corresponding scheduler. Also, it is likely desirable to support different schedulers, e.g., one using the -most efficient real implementation, one being friendly to integration with other language’s “run loop”, and one -allowing unit testing of networking operations. There are multiple options for how the networking operations -are talking to the scheduler: -6 -1. The networking operations and the scheduler use a secret channel. While that is probably the easiest to -specify, it means that the networking operations can’t accept a somehow adapted scheduler or there needs -to be a protocol for how to extract the underlying scheduler. -2. Expose/abstract the various I/O operations somehow, possibly using virtual functions or, more likely, -CPOs. While this approach is probably more generic, the interface to the operations is likely at a somewhat -lower level than what the senders use and it is possibly platform specific. For example, a read_some -operation used with io_uring(2) needs to provide a pointer to an iovec which needs to stay around -until the operation is consumed from the completion ring buffer. That is a rather different interface than -the generic buffers passed to read_some. It may be possible to define the scheduler interface such that it -defines what the caller has to store until completion but I haven’t tried implementing this approach, yet. -3. The scheduler interface may model multiple contracts (one for each support I/O operation) and each -operation produces an object which gets embededded into the I/O operation’s operation state object. -Each supported underlying I/O interface could store its data in exactly the form needed. -4. The scheduler interface for I/O operations may be what is being proposed by the Low level file i/o library. -I haven’t tried to implement that. -5. The Networking TS may be doing something in that area and it may be possible to integrate with that or, -at least, do something similar. I haven’t tried to implement that. It seems the io_context uses a secret -interface. -6. Aside from networking there are plenty of other events which could be waited for: other forms of I/O, -process termination, signals, etc. Ideally, it should be possible to wait for all of these things using just one -thread. Not all of the operations are going to be standardized (at least, not initially) and it is necessary -to somehow allow applications to expand the support and also wait for additional events. -Most likely, it is preferable to have some form of I/O scheduler abstraction than using a secret interface. However, -it isn’t yet clear how such an interface would actually look like. -4.5 Timer Class or Just a Sender -The Networking TS defines a basic_waitable_timer class template. The type of this class encodes various -timer properties like the underlying clock type and some wait traits. The primary need in the Networking TS -for this class is the need for an entity to trigger cancellations: while operations are cancellable the cancellation -needs to be explicitly wired up where necessary. Uses would look like -waitable_timer timer(/* timer settings */); -auto sender = wait_for(timer, 5s); -or using coroutines -co_await wait_for(timer, 5s); -When using sender/receiver cancellation and its necessary wiring is handled by the senders capable of cancelling -operations by appropriate use of the receiver’s stop token. Correspondingly, there isn’t really a need for a timer -class. Having to create a timer object and keeping it around is sometimes a bit annoying. Thus, it may be -reasonable to allow defining timers simply by creating a suitable sender which is then scheduled on a suitable -scheduler. The I/O schedulers are capabable of executing timers. -auto sender = wait_for(5s); -co_await wait_for(5s); -As with most of the other design choices it may be reasonable to support both alternatives: sometimes it may -be reasonable to just schedule a timed operation without the need for an object and specifying the required -properties when doing so works. In other situations it may be preferable to encapsulate the timer properties -into an object and using this object to schedule multiple timed operations. In that situation it may be possible -to define the timed operations in a way which doesn’t require the timer entity to stay alive until the timed -operation completes: removing the need for the timer entity to remain valid until the timed operation completes -should make their use simpler. The actual timed operation would be maintained by the scheduler. -7 -4.6 Higher Level Tools -The basic networking or I/O operations are fairly straightforward and there are actually not that many of -them (the io_uring operations are probably a good indication of the overall scope including operations beyond -networking). Concentrating on networking operations the Networking TS doesn’t provide everything io_uring -does. Beyond the basic operations provided by the underlying system, the networking operations can reasonably -be composed into higher level algorithms. For example, an async::read_some operation potentially reading -partial buffers successfully can be composed into an async::read operations always reading a complete buffer -or failing. The question is, what algorithms should be included in the proposal, if any? -Algorithms like async::read and async::write are somewhat obvious examples. Something like an -async::resolve could be an example of a rather non-trivial algorithm: there is the synchronous -getaddrinfo(3) function, but there doesn’t seem to be an asynchronous alternative. With an asynchronous -framework in place, it seems reasonable to include an asynchronous version of getaddrinfo(3). -Beyond sender algorithms, there may also be some other interesting components: -— It may be useful to have a coroutine task (io_task) injecting a scheduler into asynchronous networking -operations used within a coroutine together with a suitable scope (io_scope) similar to async_scope but -also tied to some I/O scheduler. The corresponding task class probably needs to be templatized on the -relevant scheduler type. -— For full-duplex operation of a socket, i.e., scheduling concurrent reading and writing operation, something -like a ring buffer with sender interfaces to the production and consumption of buffers seems useful. -There are probably various other useful algorithms and components. -4.7 Sender Adaptors -The senders for the networking operations could either be sender factories or sender adapters (see [async.ops] -for a definition). Defining the networking operations as sender adaptors has the advantage that customization -preferences, e.g., an allocator to be used for variable-sized scatter/gather arguments, can easily be specified. -Also, it makes it easier to chain operations. To further facilitate chaining, the networking senders are specified -to be pipeable sender adaptors [exec.adapt.objects]. -As it is probably quite common that a networking operation starts a work graph they are defined to also accept -all argument directly as a convenience. For example, the following senders have the same effect when started: -auto s0 = execution::just(buffer) | net::async_read_some(socket); -auto s1 = net::async_read_some(execution::just(buffer), socket); -auto s2 = net::async_read_some(socket, buffer); -This approach works nicely when the operation takes arguments which can be provided by the upstream sender. -Specifically for there is a a problem because async_accept only takes the acceptor socket as argument which is -expected to be passed directly. Thus, the respective operations would look like this: -auto s0 = execution::just() | net::async_accept(acceptor); -auto s1 = net::async_accept(execution::just(), acceptor); -auto s2 = net::async_accept(acceptor); -To have both s0 and s2 work as senders, the result of net::async_accept(acceptor) needs to be both a -sender and a sender adaptor closure. However, the specification in [exec.adapt.objects] p2 explicitly prohibits -that. There are some options to deal with the case: -1. async_accept doesn’t create a pipeable sender adaptor and a different approach needs to be used to -depend on the completion of prior work, e.g., s1. -2. The two calls are distinguished by passing a tag argument to one or both of the alternatives, e.g.: -auto s0 = execution::just() | net::async_accept(await_sender, acceptor); -8 -3. Deal with case special and allow net::async_accept(acceptor) to be both a sender and a sender adaptor -closure somehow. -4.8 Awaitable Senders -While senders can be turned into awaiters using a promise type’s await_transform, there are some drawbacks -to this approach: -1. When the I/O operation wouldn’t block, e.g., because some data could be immediately read or written, -it could be detected in await_ready() that this is the case. For example, await_ready() could attempt -whether a non-blocking operation succeeds and, if so, signal that the awaiter is ready. When wrapping a -sender, doing the same isn’t quite possible. -2. While the standard library can make sure that all coroutines provided are sender-aware, user-defined -coroutines would need to explicitly support senders, too. If the networking sender (or possibly even -senders in general) were coroutine aware and defined an operator co_await() or implemented the awaiter -interface, they would be readily usable in any coroutine. -It seems reasonable to specify that the networking senders are awaitables. Having them be awaiter directly -would effectively mean that they already include significant state. Making them awaitable would effectively -allow creation of an operation state as the result of a call to operator co_await() which is a bit tailored to -the needs of coroutines. The same may be true for other standard library senders, too. -5 Cancellation Concern -The networking operations are generally inactive after the operation was started but the network operation -hasn’t completed yet. To cancel such an operation, it is necessary to actively trigger some cancellation function, -i.e., a simple test of an atomic bool provided by a stop token generally won’t work. Thus, the various operations -need to register a callback with the receiver’s stop token. In cases where the stop token isn’t no_stop_token, -this registration needs to do some synchronization both for the registration and the deregistration of the callback. -Repeatedly doing that operation while processing data on a socket may be a performance concern. -It may be possible to avoid setting up cancellation for individual operations and rather hook the cancellation -once to a suitable entity like a socket. The corresponding approaches will need some level of support by the -library. For example, it may be necessary to support calls to cancel some operations on a socket. -Even when doing so, it may be necessary to inhibit registration of callbacks with a stop token. For example, -when_all will use receivers using a different stop token than no_stop_token with its various senders. In that -case it may be useful to have a sender adapter which passes through all completion signals received but always -exposes a no_stop_token to its sender. -Some systems already have some cancellation support for common use cases. For exmaple, io_uring(2) supports timeouts for its operations (using IORING_OP_LINK_TIMEOUT). To easily tap into this pattern, it may -be reasonable to have a corresponding timeout sender taking a time and sender which may be a networking -operation: if either the time expires or the sender completes, the respective other operation would be cancelled. -Where available, the timeout could then just setup the underlying system to provide the corresponding functionality. Otherwise, it would behave like a timer and another sender given to a when_any operation completing -appropriately upon completion of the first operation and cancelling the other via a stop token. -This area still needs some experimentation and, I think, design. The general direction of encapsulating the -cancellation into the asynchronous operations is rather interesting, but it isn’t a priori clear how to avoid -potential costs. -6 Discussion -This section aggregates topics discussed that aren’t covered elsewhere in the document. It isn’t intended to -record the actual discussion we had but rather to cover how to address the respective concerns. -9 -6.1 Support A system execution context -During the SG4 discussion at Issaquah (2023-02-09), support for a system execution context, e.g., based on -Grand Central Displatch/libdispatch was discussed. It should be straight forward to expose any of the networking operations using such a context assuming the underlying facilities can support the respective operations. -When looking at creating a context using Grand Central Displatch/libdispatch to implement the networking -operations, I could only find operations for asynchronous reading and writing but no connection management -(accept/connect/shutdown). -The work on the respective facilities should be suitably coordinated with the people proposing a system execution -context. -6.2 Resolution -DNS offers name resolution as well as reverse look-up. Name resolution is important as explicit use of IP -addresses should be avoided. Name resolution can be implemented as an algorithm using other networking -operations. -It was suggested to implement name resolution in terms of libcares. This library is operates in terms of readiness -indication of sockets specified via the native handles. Based on the readiness indication it directly uses OS level -network functionality to send or receive messages. In that sense, an implementation using libcares isn’t an -algorithm on top of other networking operations. -An implementation of resolution using other networking operation is probably preferable as the various operations -could be customized in some form. For example, to support testing a context implementation could respond -with predicatable results replicating various scenarios including error scenarios which may be harder to construct -based on a real implementation. However, an implementation using, e.g., libcares should be viable. To support -an implementation using libraries requiring a readiness indication, an async_wait() operation is added. -The scope of the specified resolution interfaces is limited to simple name and address look-up. For example, -DNS support response yielding multiple results. For the initial specification, only look-ups by name and address (reverse look-up) are supported and in both cases only at most one result is considered. More advanced -functionality can be added by separate proposals. -Most likely sequence senders using set_next() to yield multiple results before completing should be used. -However, there is currently (2023-09-01) no proposal for sequence senders. -Implementations should probably be encouraged to use secured DNS approaches where possible. Whether doing -so is actually viable still needs to be researched. -6.3 Naming -At the Issaquah SG4 discussion (2023-02-09), it was suggested to drop the async_ prefix from the names of the -networking operations. Instead, the blocking operation should get a prefix like blocking_ or sync_ as these -operations should be rarely used and the “good” name should be reserved for the commonly used alternative. -The current naming scheme follows the names from the Networking TS. Renaming the operations should be -straight forward if that’s the decision. -As usual, there are plenty of options how to name things. When considering naming, here are a few alternatives -to consider: -1. The current approach is to use the operation with/without prefix. -2. Both the asynchronous and the blocking approach could get a prefix to always clearly indicate what is -being used. -3. Instead of using prefixes, the operations could be in different namespace assuming the operations are nonmembers. This alternative may be especially reasonable if there is also a special variation for the use with -coroutines. -10 -4. One set of operations could be member functions while the others are non-member functions. As the -customization point approach of sender/receiver lends itself better to non-member functions the nonmember use could be asynchronous and the member use could be synchronous. -5. To further the asynchronous approach the standard library could choose not to provide any synchronous -interface at all! Instead, users wanting to use a the operations synchronously would use sync_wait(sender) -also clearly indicating that the operation would be blocking (although this exact alternative probably yields -a result too annoying to use and a custom function would be in order). -Ideally, the choice of naming isn’t changed every time the proposal is discussed: once a naming scheme is chosen, -it should be kept unless new information emerges which proves the current choice unsuitable. -6.4 Constraints On Used Scheduler -The various socket types are created with a specific context. Executing any of the operations on a scheduler -not refering to the corresponding context is an error. While the underlying system may not associate a specific -context with any socket, the C++ implementation may require additional information for its operation. -In the past the normal approach to similar misuse, e.g., comparing iterators from different containers of the -same type, was to treat it as undefined behavior. In the context of networking operation it seems the misuse -can be detected at reasonable cost and can be turned into an error, instead. Pursuing this direction the actual -error should probably be specified, e.g., in form or a suitable error code and error category combination. -6.5 Required Features -During discussions this proposal various features were mentioned. While this proposal mostly targets adding -sender-versions of the operations in the Networking TS (and in its current form it is quite complete at doing so) -it seems there are other features also required to make the result an acceptable addition to the C++ standard. -This section provides a brief outline of various features mentioned in preparation to poll how to classify them: -— required: without the feature networking shall not be standardized -— optional: it would be great to have it but it is viable to standardize networking witout -— separate: the feature should be added by a separate proposal -6.5.1 Extensible Scheduler Interface -It was stated that a scheduler interface for networking operations needs to be defined. The Networking TS doesn’t -do so. Having a defined interface should allow extending the implementation by custom implementations. For -example, a test implementation where network operations and their errors are simulated to test applications -using it is rather useful for unit tests. Likewise, a logging implementation which behaves like a filter and logs -the observed operation at some details would be good. There are probably other uses cases. -The details of such an interface are currently still unclear. Most likely the chosen implementation shouldn’t -actually affect the types of interfaces as otherwise everything using networking operations would need to be -templatized on the scheduler interface. In any case definition of such an interface will require design work. -6.5.2 TLS Support -Secured network communication is rather important. TLS support can be added to the implementation by -binding a library like openssl (this is an example and there are other implementations). Especially, if an -extensible scheduler interface is defined it is viable to implement secured networking using this facility. -If TLS is to be defined there are different choices. Using, for example, openssl it is fairly straight forward to use -a readiness indicator (async_wait) to make the operations non-blocking. However, the resulting implementation would directly use an OS-level interface like recvmsg/sendmsg to interface the network rather than using -async_receive/async_send. In return the result may be more efficient. -It can be argued that binding a library like openssl isn’t entirely trivial and shouldn’t be left to the user. -11 -6.5.3 Buffer Pools -When having many connections awaiting data it can be wasteful if each connection needs to be provide a -read buffer for data to be received. Instead, a buffer pool could be specified, possibly using just one buffer -shared between thousands of connections. System interfaces using completion signals like io_uring support -special operations such that the buffer is obtained by the kernal from a pool, making it desirable to expose the -functionality. The is currently no design for buffer pools. -Of course, if there are many clients the application is probably facing the Internet and a secured connection is -used. In that case the state needed to maintain a TLS connection is probably already significantly contributing -to the per connection data and sharing the buffer may not make a huge difference. -6.5.4 Async Streams -Defining basic asynchronous networking operations should, in general, allow creation of reasonable networking -aware applications. To actually make the creation easy an interface for asychronous streams may be needed. -How the interface for such streams would look like is a separate question. To me even the exact objective isn’t -even clear. There s currently no design for buffer pools. -6.5.5 Networking Algorithms -It was stated that networking algorithms are needed. Hopefully, the available operations allow reasonably -easy implementation of relevant algorithms. Of course, not every user should be required to write all relevant -algorithms raising the questions: what are the relevant algorithms and which of these should be supported? -Here is a probably incomplete list: -— resolve_name(name): resolve name into an address/endpoint or a sequence of addresses/endpoints. -— resolve_address(address): obtain a name or a list of names for address (reverse resolution). -— async_read/async_write: like async_read_some/ async_write_some but process the entire available -buffer. There is a bit of design for these algorithms to deal with partial results and progress: maybe using -a sequence sender (for which there is no paper, yet) would be reasonable. -— timeout(duration, sender): essentially when_any(resume_after(duration), sender) although probably with a nicer interface to consume an upstream sender and produce nicer to use results (although -when_any isn’t proposed by the std::execution proposal). Also, in some situations there are more effective ways to implementation a time out. -— async_connect(socket, endpoints): like async_connect but use a sequence of endpoints, trying to -connect each one in turn until the connection succeeds. -— async_connect(socket, name): like async_connect(socket, endpoints) but with the sequence of endpoints obtained from name resolution of name rather than a list of readily resolved endpoints. -Which other algorithms are required? -7 Questions -— Should networking support only a sender/receiver model for asynchronous operations, i.e., should the -Networking TS’s executor model be removed? -— How should the asynchronous networking operation be named? See the Naming section for discussion. -— Should misuse, e.g., executing an operation on a scheduler not matching a socket’s context, be an error or -undefined behavior? -— What features are necessary (see above for some details)? -— Extensible scheduler interface -— TLS support -— Buffer pools -— Async streams -12 -— resolve_name -— resolve_address -— async_read/async_write -— timeout -— connect(socket, endpoints) -— connect(socket, name) -8 Wording for Networking CPOs -In 14.2 [io_context.io_context], add a scheduler_type and a get_scheduler() method to the synopsis: -namespace std { -namespace experimental { -namespace net { -inline namespace v1 { -class io_context : public execution_context -{ -public: -... -class executor_type; -class scheduler_type; -... -executor_type get_executor() noexcept; -scheduler_type get_scheduler() noexcept; -... -}; -} -} -} -} -In 14.2 [io_context.io_context] after paragraph 2, add a new paragraph: -2 count_type is an implementation-defined unsigned integral type of at least 32 bits. -3 scheduler_type is a type modelling scheduler [exec.sched]. -In 14.2.1a [io_context.io_context.members] after paragraph 3 add a new paragraph: -executor_type get_executor() noexcept; -3 Returns: An executor that may be used for submitting function objects to the io_context. -scheduler_type get_scheduler() noexcept; -4 Returns: A scheduler that may be used for submitting sender objects to the io_context. -Add a new section for the networking operations: -9 Networking Senders [net.sender] -9.1 General [net.sender.general] -1 Subclause [net.sender] defines sender adaptors [async.ops] for networking operations. When the corresponding -operations are started, they do not block any thread. Instead they complete once the corresponding operation -becomes ready. How the system determines that an operation is ready is implementation specific. -2 These sender adaptors share some common behavior: -13 -(2.1) — The specification for sender adaptors [exec.adapt.general] applies to the sender adaptors in [net.sender]. -The sender adaptor in [net.sender] are pipeable sender adaptors [exec.adapt.objects]. -(2.2) — When a sender s is connected [exec.connect] to a receiver r and the resulting operation state is -execution::started [exec.opstate.start], a callback for cancellation is registered with the stop token -obtained using get_stop_token(get_env(r)). When this callback is invoked the corresponding operation -is cancelled and one of the completion signatures is invoked in a timely manner. The operation can -complete using set_stopped [exec.set.stopped] but it may still complete with one of the other completion -signals instead if the operation became ready otherwise. -(2.3) — Starting an operation state [exec.opstate.start] may complete the operation immediately from the starting -thread if it is ready to be completed. Otherwise, the operation is initiated using the scheduler and gets -completed once it becomes ready. -(2.4) — Any object referenced in the sender call needs to stay valid while the sender or an operation state obtained -from the sender by connecting it to a receiver is used. A sender for a network operation stops being used -when it gets connected to a a receiver or when it gets destroyed. An operation state stops being used when -it is destroyed or when a completion signal is invoked after the operation state was execution::started -[exec.opstate.start]. -(2.5) — The customization points for the sender adaptors also provide an overload taking the arguments which -would come from the adapted sender directly. The behavior is as if execution::just [exec.just] with the -arguments is used as sender argument. -(2.6) — When a sender created by one of the operations taking a socket reference sock (however it is named) as argument is connected to a receiver r, it is an error if a scheduler obtained from get_scheduler(get_env(r)) -does not refer to the context used to create sock. [Note: In the past the specification for inconsistent use -typically made the behavior undefined. With the recent push towards removing undefined behavior it seems -prudent to avoid undefined behavior for reasonably detectable error. The misuse should be detectable. It -may be necessary to specify the actually error, though. – end note] -[Example: the following senders have the same effect when connected and started: -auto s0 = execution::just(buffer) | net::async_read_some(s); -auto s1 = net::async_read_some(execution::just(buffer), s); -auto s2 = net::async_read_some(s, buffer); -–End Example]. -9.2 Network Sender Operations Synopsis [net.sender.syn] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_accept_t; // [net.sender.async.accept] -struct async_connect_t; // [net.sender.async.connect] -struct async_read_some_t; // [net.sender.async.read.some] -struct async_read_t; // [net.sender.async.read] -struct async_receive_from_t; // [net.sender.async.receive.from] -struct async_receive_t; // [net.sender.async.receive] -struct async_resolve_address_t; // [net.sender.async.resolve.address] -struct async_resolve_name_t; // [net.sender.async.resolve.name] -struct async_resume_after_t; // [net.sender.async.resume.after] -struct async_resume_at_t; // [net.sender.async.resume.at] -struct async_send_t; // [net.sender.async.send] -struct async_send_to_t; // [net.sender.async.send.to] -struct async_wait_t; // [net.sender.async.wait] -struct async_write_some_t; // [net.sender.async.write.some] -14 -struct async_write_t; // [net.sender.async.write] -struct timeout_t; // [net.sender.async.timeout] -struct when_any_t; // [net.sender.async.when.any] -} -using sender-adaptors::async_accept_t; -using sender-adaptors::async_connect_t; -using sender-adaptors::async_read_some_t; -using sender-adaptors::async_read_t; -using sender-adaptors::async_receive_from_t; -using sender-adaptors::async_receive_t; -using sender-adaptors::async_resolve_address_t; -using sender-adaptors::async_resolve_name_t; -using sender-adaptors::async_resume_after_t; -using sender-adaptors::async_resume_at_t; -using sender-adaptors::async_send_t; -using sender-adaptors::async_send_to_t; -using sender-adaptors::async_wait_t; -using sender-adaptors::async_write_some_t; -using sender-adaptors::async_write_t; -using sender-adaptors::timeout_t; -using sender-adaptors::when_any_t; -inline constexpr async_accept_t async_accept{}; -inline constexpr async_connect_t async_connect{}; -inline constexpr async_read_some_t async_read_some{}; -inline constexpr async_read_t async_read{}; -inline constexpr async_receive_from_t async_receive_from{}; -inline constexpr async_receive_t async_receive{}; -inline constexpr async_resolve_address_t async_resolve_address{}; -inline constexpr async_resolve_name_t async_resolve_name{}; -inline constexpr async_resume_after_t async_resume_after{}; -inline constexpr async_resume_at_t async_resume_at{}; -inline constexpr async_send_t async_send{}; -inline constexpr async_send_to_t async_send_to{}; -inline constexpr async_wait_t async_wait{}; -inline constexpr async_write_some_t async_write_some{}; -inline constexpr async_write_t async_write{}; -inline constexpr timeout_t timeout{}; -inline constexpr when_any_t when_any{}; -} -9.3 Network Sender Operations [net.sender.operations] -[Note: The shape of the exact operations isn’t quite clear yet, as there are various design option (see above). -Below the relevant operations are listed together with their arguments and their likely completion_signatures. -The current list of operations may be incomplete. The “as if” code isn’t necessarily ready to compile and may -omit necessary casts and use private operations in some cases. – end note] -9.3.1 net::async_accept [net.sender.async.accept] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -15 -struct async_accept_t { -struct accept-sender; // exposition only -template -accept-sender operator()(basic_socket_acceptor& acceptor) const; -template Sender, -class AcceptableProtocol> -accept-sender operator()(Sender&& sender, -basic_socket_acceptor& acceptor) const; -}; -} -} -1 async_accept_t is the type of customization point objects for creating senders for accepting new socket connections. -template -accept-sender -async_accept_t::operator()(basic_socket_acceptor& acceptor) const -2 Returns: (*this)(execution::just(), acceptor); -template Sender, -class AcceptableProtocol> -accept-sender -async_accept_t::operator()(Sender&& sender, -basic_socket_acceptor& acceptor) const; -3 The operator creates an accept-sender. After the returned accept-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. If sender completes with a set_error or -set_stopped completion, the completion is forwarded to the connected receiver. -4 Let s be the accept-sender returned from async_accept_t()(sender, acceptor). Let Acceptor be the type -basic_socket_acceptor and env be an environment object. The completion signatures -returned from execution::get_completion_signatures(s, env) contain the following elements: -(4.1) — execution::set_value_t(typename Acceptor::socket_type, typename Acceptor::endpoint_type) -(4.2) — execution::set_error_t(error_code) -(4.3) — execution::set_stopped_t() -(4.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -5 When s is connected to a receiver r and the resulting operation state is started, it initiates an asynchronous -operation to extract a socket from the queue of pending connections for acceptor when there is at least one -pending connection, as if by POSIX: -typename Acceptor::endpoint_type ep; -socklen_t addrlen(ep.capacity()); -auto h = accept(acceptor.native_handle(), ep.data(), &addrlen); -if (h < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -ep.resize(addrlen); -execution::set_value(std::move(r), typename Acceptor::socket_type(h), ep); -} -6 [Note: The Networking TS passes the endpoint optionally as a reference argument, resulting in two interfaces: -16 -one where the endpoint isn’t obtained and one where it is. The interface for async_accept could reference -the endpoint as well, instead of providing it with set_value. An alternative design allowing omission of the -endpoint is to have async_accept not providing an endpoint with set_value and async_accept_from with -the completion signature above. _–End Note__] -9.3.2 net::async_connect [net.sender.async.connect] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_connect_t { -struct connect-sender; // exposition only -struct connect-sender-adaptor-closure; // exposition only -template -connect-sender operator()(basic_stream_socket& socket, -const typename basic_stream_socket::endpoint_type& ep) const; -template -connect-sender-adaptor-closure -operator()(basic_stream_socket& socket) const; -template ::endpoint_type> Sender> -connect-sender operator()(Sender&& sender, -basic_stream_socket& socket) const; -}; -} -} -1 async_connect_t is the type of customization point objects for creating senders connecting a stream socket to -a peer. -template -connect-sender -async_connect_t::operator()(basic_stream_socket& socket, -typename basic_stream_socket::endpoint_type ep) const; -2 Returns: (*this)(execution::just(ep), socket); -template -connect-sender-adaptor-closure -async_connect_t::operator()(basic_stream_socket& socket) const; -3 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -connect-sender -async_connect_t::operator()(Sender&& sender, -basic_stream_socket& socket) const; -4 The operator creates a connect-sender. After the returned connect-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. It uses the endpoint_type argument to set_value -as the address to connect to. If sender completes with a set_error or set_stopped completion, the completion -is forwarded to the connected receiver. -5 Let s be the connect-sender returned from async_connect_t()(sender, socket) and env be an environment -object. The completion signatures returned from execution::get_completion_signatures(s, env) contain -17 -the following elements: -(5.1) — execution::set_value_t() -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument ep, it initiates an asynchronous operation to connect to a peer, as if by POSIX: -if (connect(socket.native_handle(), ep.data(), ep.size()) < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -execution::set_value(std::move(r)); -} -9.3.3 net::async_read_some [net.sender.async.read.some] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_read_some_t { -struct read-some-sender; // exposition only -struct read-some-sender-adaptor-closure; // exposition only -template -read-some-sender operator()(basic_stream_socket& socket, -const MutableBufferSequence& buffers) const; -template -read-some-sender-adaptor-closure -operator()(basic_stream_socket& socket) const; -template -read-some-sender operator()(Sender&& sender, -basic_stream_socket& socket) const; -}; -} -} -1 async_read_some_t is the type of customization point objects for creating senders reading a sequence of buffers -from a stream socket. -template -read-some-sender -async_read_some_t::operator()(basic_stream_socket& socket, -const MutableBufferSequence& buffers) const; -2 Returns: (*this)(execution::just(buffers), socket); -template -read-some-sender-adaptor-closure -async_read_some_t::operator()(basic_stream_socket& socket) const; -3 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -read-some-sender -18 -async_read_some_t::operator()(Sender&& sender, -basic_stream_socket& socket) const; -4 Returns: async_receive(sender, socket); -5 [Note: The customization point may provide additional overloads, e.g., for files. As a result this name may be -useful for algorithms applicable to other streams then just sockets in the future. _ – end note_] -9.3.4 net::async_receive [net.sender.async.receive] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_receive_t { -struct receive-sender; // exposition only -struct receive-sender-adaptor-closure; // exposition only -template -receive-sender operator()(basic_socket& socket, -const MutableBufferSequence& buffers) const; -template -receive-sender operator()(basic_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers) const; -template -receive-sender-adaptor-closure -operator()(basic_socket& socket) const; -template -receive-sender operator()(Sender&& sender, -basic_socket& socket) const; -}; -} -} -1 async_receive_t is the type of customization point objects for creating senders receiving messages on a connected socket. -template -receive-sender -async_receive_t::operator()(basic_socket& socket, -const MutableBufferSequence& buffers) const; -2 Returns: (*this)(execution::just(buffers), socket); -template -receive-sender -async_receive_t::operator()(basic_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers) const; -3 Returns: (*this)(execution::just(flags, buffers), socket); -template -receive-sender-adaptor-closure -async_receive_t::perator()(basic_socket& socket) const; -19 -4 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -receive-sender -async_receive_t::operator()(Sender&& sender, -basic_socket& socket) const; -5 The operator creates a receive-sender. After the returned receive-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. It uses the flags and buffers arguments to -set_value to specify how to receive data. If the sender_base::message_flags parameter is not present it -uses a flags variable initialized to socket_base::message_flags(). If sender completes with a set_error or -set_stopped completion, the completion is forwarded to the connected receiver. -6 Let s be the receive-sender returned from async_receive_t()(sender, socket) and env be an environment -object. The completion signatures returned from execution::get_completion_signatures(s, env) contain -the following elements: -(5.1) — execution::set_value_t() -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument buffers or arguments flags and buffers, it initiates an asynchronous operation to receive data. The -operation constructs and array iov of POSIX struct iovec of size length corresponding to buffers and reads -data as if by POSIX: -msghdr message; -message.msg_name = nullptr; -message.msg_namelen = 0; -message.msg_iov = iov; -message.msg_iovlen = length; -message.msg_control = nullptr; -message.msg_controllen = 0; -message.msg_flags = 0; -auto n = recvmsg(socket.native_handle(), &message, static_cast(flags)); -if (n < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -execution::set_value(std::move(r), n); -} -9.3.5 net::async_receive_from [net.sender.async.receive.from] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_receive_from_t { -struct receive-from-sender; // exposition only -struct receive-from-sender-adaptor-closure; // exposition only -template -receive-from-sender operator()(basic_datagram_socket& socket, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -template -20 -receive-from-sender operator()(basic_datagram_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -template -receive-from-sender-adaptor-closure -operator()(basic_datagram_socket& socket) const; -template -receive-from-sender operator()(Sender&& sender, -basic_datagram_socket& socket) const; -}; -} -} -1 async_receive_from_t is the type of customization point objects for creating senders receiving messages on a -socket from a specified endpoint. -template -receive-from-sender -async_receive_from_t::operator()(basic_datagram_socket& socket, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -2 Returns: (*this)(execution::just(buffers, ep), socket); -template -receive-from-sender -async_receive_from_t::operator()(basic_datagram_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -3 Returns: (*this)(execution::just(flags, buffers, ep), socket); -template -receive-from-sender-adaptor-closure -async_receive_from_t::operator()(basic_datagram_socket& socket) const; -4 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -receive-from-sender -async_receive_from_t::operator()(Sender&& sender, -basic_datagram_socket& socket) const; -5 The operator creates a receive-from-sender. After the returned receive-from-sender is connect()ed it uses the -set_value completion of sender to trigger the start of its own operation. It uses the flags, buffers, and -ep arguments to set_value to specify how to receive data. If the sender_base::message_flags parameter is -not present it uses a flags variable initialized to socket_base::message_flags(). If sender completes with -a set_error or set_stopped completion, the completion is forwarded to the connected receiver. -6 Let s be the receive-from-sender returned from async_receive_from_t()(sender, socket) and env be an environment object. The completion signatures returned from execution::get_completion_signatures(s, env) -contain the following elements: -(5.1) — execution::set_value_t() -21 -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument buffers and ep or arguments flags, buffers, and ep, it initiates an asynchronous operation to -receive data. The operation constructs and array iov of POSIX struct iovec of size length corresponding to -buffers and receives data as if by POSIX: -typename Socket::endpoint_type addr; -socklen_t addrlen{ep.capacity()}; -msghdr message; -message.msg_name = addr.data(); -message.msg_namelen = &addrlen; -message.msg_iov = iov; -message.msg_iovlen = length; -message.msg_control = nullptr; -message.msg_controllen = 0; -message.msg_flags = 0; -auto n = recvmsg(socket.native_handle(), &message, static_cast(flags)); -if (n < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -addr.resize(addrlen); -execution::set_value(std::move(r), n, addr); -} -9.3.6 net::async_send [net.sender.async.send] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_send_t { -struct send-sender; // exposition only -struct send-sender-adaptor-closure; // exposition only -template -send-sender operator()(basic_socket& socket, -const ConstantBufferSequence& buffers) const; -template -send-sender operator()(basic_socket& socket, -sender_base::message_flags flags, -const ConstantBufferSequence& buffers) const; -template -send-sender-adaptor-closure -operator()(basic_socket& socket) const; -template -send-sender operator()(Sender&& sender, -basic_socket& socket) const; -}; -} -} -22 -1 async_send_t is the type of customization point objects for creating senders sending messages on a connected -socket. -template -send-sender -async_send_t::operator()(basic_socket& socket, -const ConstantBufferSequence& buffers) const; -2 Returns: (*this)(execution::just(buffers), socket); -template -send-sender -async_send_t::operator()(basic_socket& socket, -sender_base::message_flags flags, -const ConstantBufferSequence& buffers) const; -3 Returns: (*this)(execution::just(flags, buffers), socket); -template -send-sender-adaptor-closure -async_send_t::perator()(basic_socket& socket) const; -4 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -send-sender -async_send_t::operator()(Sender&& sender, -basic_socket& socket) const; -5 The operator creates a send-sender. After the returned send-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. It uses the flags and buffers arguments to -set_value to specify how to send data. If the sender_base::message_flags parameter is not present it uses -a flags variable initialized to socket_base::message_flags(). If sender completes with a set_error or -set_stopped completion, the completion is forwarded to the connected receiver. -6 Let s be the send-sender returned from async_send_t()(sender, socket) and env be an environment object. The completion signatures returned from execution::get_completion_signatures(s, env) contain the -following elements: -(5.1) — execution::set_value_t() -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument buffers or arguments flags and buffers, it initiates an asynchronous operation to send data. The -operation constructs and array iov of POSIX struct iovec of size length corresponding to buffers and sends -data as if by POSIX: -msghdr message; -message.msg_name = nullptr; -message.msg_namelen = 0; -message.msg_iov = iov; -message.msg_iovlen = length; -message.msg_control = nullptr; -message.msg_controllen = 0; -message.msg_flags = 0; -auto n = sendmsg(socket.native_handle(), &message, static_cast(flags)); -23 -if (n < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -execution::set_value(std::move(r), n); -} -9.3.7 net::async_send_to [net.sender.async.send.to] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_send_to_t { -struct send-to-sender; // exposition only -struct send-to-sender-adaptor-closure; // exposition only -template -send-to-sender operator()(basic_datagram_socket& socket, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -template -send-to-sender operator()(basic_datagram_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -template -send-to-sender-adaptor-closure -operator()(basic_datagram_socket& socket) const; -template -send-to-sender operator()(Sender&& sender, -basic_datagram_socket& socket) const; -}; -} -} -1 async_send_to_t is the type of customization point objects for creating senders sending messages on a socket -to a specified endpoint. -template -send-to-sender -async_send_to_t::operator()(basic_datagram_socket& socket, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -2 Returns: (*this)(execution::just(buffers, ep), socket); -template -send-to-sender -async_send_to_t::operator()(basic_datagram_socket& socket, -sender_base::message_flags flags, -const MutableBufferSequence& buffers, -typename basic_datagram_socket::endpoint_type& ep) const; -3 Returns: (*this)(execution::just(flags, buffers, ep), socket); -24 -template -send-to-sender-adaptor-closure -async_send_to_t::operator()(basic_datagram_socket& socket) const; -4 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -send-to-sender -async_send_to_t::operator()(Sender&& sender, -basic_datagram_socket& socket) const; -5 The operator creates a send-to-sender. After the returned send-to-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. It uses the flags, buffers, and ep arguments -to set_value to specify how to send data. If the sender_base::message_flags parameter is not present it -uses a flags variable initialized to socket_base::message_flags(). If sender completes with a set_error or -set_stopped completion, the completion is forwarded to the connected receiver. -6 Let s be the send-to-sender returned from async_send_to_t()(sender, socket) and env be an environment -object. The completion signatures returned from execution::get_completion_signatures(s, env) contain -the following elements: -(5.1) — execution::set_value_t() -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument buffers and ep or arguments flags, buffers, and ep, it initiates an asynchronous operation to send -data. The operation constructs and array iov of POSIX struct iovec of size length corresponding to buffers -and receives data as if by POSIX: -msghdr message; -message.msg_name = ep.data(); -message.msg_namelen = ep.size(); -message.msg_iov = iov; -message.msg_iovlen = length; -message.msg_control = nullptr; -message.msg_controllen = 0; -message.msg_flags = 0; -auto n = sendmsg(socket.native_handle(), &message, static_cast(flags)); -if (n < 0) { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -else { -execution::set_value(std::move(r), n); -} -9.3.8 net::async_wait [net.sender.async.wait] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_wait_t { -struct wait-sender; // exposition only -struct wait-sender-adaptor-closure; // exposition only -25 -template -wait-sender operator()(basic_socket& socket, -socket_base::wait_type events) const; -template -wait-sender-adaptor-closure operator()(basic_socket& socket) const; -template Sender, -class Protocol> -wait-sender operator()(Sender&& sender, -basic_socket& socket) const; -}; -} -} -1 async_wait_t is the type of customization point objects for creating senders awaiting readiness of sockets for a -specific set of operations. [Note: This operation could be seen as the basis operation for readiness-based contexts -as all the operations can be implemented in terms of this operation and conceptually blocking calls which are -known not to block due to the readiness indicator. – end note] -template -wait-sender -async_wait_t::operator()(basic_socket& socket, -socket_base::wait_type events) const -2 Returns: (*this)(execution::just(events), socket); -template -wait-sender-adaptor-closure -async_wait_t::operator()(basic_socket& socket) const; -3 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template Sender, -class Protocol> -wait-sender -async_wait_t::operator()(Sender&& sender, -basic_socket& socket) const; -4 The operator creates a wait-sender. After the returned wait-sender is connect()ed it uses the set_value -completion of sender to trigger the start of its own operation. It uses the socket_base::wait_type argument -to set_value as a bitmask to await readiness of the respective conditions. If sender completes with a set_error -or set_stopped completion, the completion is forwarded to the connected receiver. -5 Let s be the wait-sender returned from async_wait_t()(sender, socket) and env be an environment object. The completion signatures returned from execution::get_completion_signatures(s, env) contain the -following elements: -(5.1) — execution::set_value_t(socket_base::wait_type) -(5.2) — execution::set_error_t(error_code) -(5.3) — execution::set_stopped_t() -(5.4) — Any additional execution::set_error_t signature from execution::get_completion_signatures(sender, env) -6 When s is connected to a receiver r and the resulting operation state is started by a call to set_value with -argument events, it initiates an asynchronous operation to await readiness of I/O operations on s, as if by -POSIX: -26 -pollfds fds[1]; -fds.fd = s.native_handle(); -fds.events = (events & s.wait_read? POLLIN: 0) | (events & s.wait_write? POLLOUT: 0); -if (1 == poll(fds, 1, 0)) { -execution::set_value(std::move(r), -(fds[0].revents & POLLIN? s.wait_read(): socket_base::wait_type()) | -(fds[0].revents & POLLOUT? s.wait_write(): socket_base::wait_type()) | -(fds[0].revents & POLLERR? s.wait_error(): socket_base::wait_type())); -} -else { -execution::set_error(std::move(r), error_code(errno, system_category())); -} -9.3.9 net::async_write_some [net.sender.async.write.some] -namespace std::experimental::net::inline v1 { -namespace sender-adaptors // exposition only { -struct async_write_some_t { -struct write-some-sender; // exposition only -struct write-some-sender-adaptor-closure; // exposition only -template -write-some-sender operator()(basic_stream_socket& socket, -const ConstBufferSequence& buffers) const; -template -write-some-sender-adaptor-closure -operator()(basic_stream_socket& socket) const; -template -write-some-sender operator()(Sender&& sender, -basic_stream_socket& socket) const; -}; -} -} -1 async_write_some_t is the type of customization point objects for creating senders writing a sequence of buffers -to a stream socket. -template -write-some-sender -async_write_some_t::operator()(basic_stream_socket& socket, -const ConstBufferSequence& buffers) const; -2 Returns: (*this)(execution::just(buffers), socket); -template -write-some-sender-adaptor-closure -async_write_some_t::operator()(basic_stream_socket& socket) const; -3 Returns: an object closure such that sender | closure yields an object equivalent to (*this)(sender, socket). -template -write-some-sender -async_write_some_t_operator()(Sender&& sender, -basic_stream_socket& socket) const; -27 -4 Returns: async_send(sender, socket); -5 [Note: The customization point may provide additional overloads, e.g., for files. As a result this name may be -useful for algorithms applicable to other streams then just sockets in the future. _ – end note_] -9.4 Network Algorithms [net.algorithms] -9.4.1 General [net.algorithms.general] -1 The section [net.algorithms] specifies algorithms relevant for networking programming. Whether the corresponding operations are, indeed, implemented as algorithms using lower level networking operations or something -different is left unspecified altough implementations are encouraged to do so. -9.4.2 net::async_resolve_name [net.sender.async.resolve.name] -1 async_resolve_name is a customization point for resolving a name to an address. -2 async_resolve_name is parameterized on an InternetProtocol and takes a sequence of characters specifying a -name to be looked-up as argument. Upon success it returns a net::ip::basic_endpoint -with a result from the look-up. -3 The completion signatures returned from from execution::get_completion_signatures(s, env) contain -three elements where s is a sender object returned from net::async_resolve_name: -(3.1) — execution::set_value(net::ip::basic_endpoint) -(3.2) — execution::set_error_t(error_code) -(3.3) — execution::set_stopped_t() -4 The default implementation uses DNS (as specified by a sequence of RFCs) to resolve the name into an addresses. -How the operation is done exactly is implementation specific. -5 [Editing note: There should probably a version of the operation yielding a sequence of results. Most likely the -proper approach to do is to use sequence senders which are, however, not yet in a any proposal being discussed. -– end note] -9.4.3 net::async_resolve_address [net.sender.async.resolve.address] -1 async_resolve_address is a customization point for resolving an address to a name. -2 async_resolve_name takes an net::ip::basic_endpoint as argument. Upon success it -returns a string_view with a name for the endpoint. [Note: The referenced sequence of characters is only valid -until set_value returns – end note] -3 Let s be a sender object returned from async_reolve_name. The completion signatures returned from -execution::get_completion_signatures(s, env) contain three elements: -(3.1) — execution::set_value(string_view) -(3.2) — execution::set_error_t(error_code) -(3.3) — execution::set_stopped_t() -4 The default implementation uses DNS (as specified by a sequence of RFCs) to resolve the address into a name. -How the operation is done exactly is implementation specific. -5 [Editing Note: There should probably a version of the operation yielding a sequence of results. Most likely the -proper approach to do is to use sequence senders which are, however, not yet in a any proposal being discussed. -– end note] -28 \ No newline at end of file diff --git a/context/research/p3552.md b/context/research/p3552.md deleted file mode 100644 index 19201f6..0000000 --- a/context/research/p3552.md +++ /dev/null @@ -1,928 +0,0 @@ -Add a Coroutine Task Type -Document #: P3552R3 [Latest] [Status] -Date: 2025-06-20 -Project: Programming Language C++ -Audience: Concurrency Working Group (SG1) -Library Evolution Working Group (LEWG) -Library Working Group (LWG) -Reply-to: Dietmar Kühl (Bloomberg) - -Maikel Nadolski - -C++20 added support for coroutines that can improve the experience writing asynchronous code. C++26 added the sender/receiver model for a general interface to asynchronous operations. The expectation is that users would use the framework using some coroutine type. To support that, a suitable class needs to be defined and this proposal is providing such a definition. - -Just to get an idea what this proposal is about: here is a simple Hello, world written using the proposed coroutine type: - -#include -#include -#include - -namespace ex = std::execution; - -int main() { - return std::get<0>(*ex::sync_wait([]->ex::task { - std::cout << "Hello, world!\n"; - co_return co_await ex::just(0); - }())); -} -1 Change History -1.1 R0 Initial Revision -1.2 R1 Hagenberg Feedback -Changed the name from lazy to task based on SG1 feedback and dropped the section on why lazy was chosen. -Changed the name of any_scheduler to task_scheduler. -Added wording for the task specification. -1.3 R2 LEWG Feedback -Removed the use decay_t from the specification. -Changed the wording to avoid exception_ptr when unavailable. -Renamed Context to Environment to better reflect the argument’s use. -Made exposition-only “macros” use italics. -Added a feature test macro. -Fixed some typos. -1.4 R3 LWG Feedback -2 Prior Work -This proposal isn’t the first to propose a coroutine type. Prior proposals didn’t see any recent (post introduction of sender/receiver) update, although corresponding proposals were discussed informally on multiple occasions. There are also implementations of coroutine types based on a sender/receiver model in active use. This section provides an overview of this prior work, and where relevant, of corresponding discussions. This section is primarily for motivating requirements and describing some points in the design space. - -2.1 P1056: Add lazy coroutine (coroutine task) type -The paper describes a task/lazy type (in P1056r0 the name was task; the primary change for P1056r1 is changing the name to lazy). The fundamental idea is to have a coroutine type which can be co_awaited: the interface of lazy consists of move constructor, deliberately no move assignment, a destructor, and operator co_await(). The proposals don’t go into much detail on how to eventually use a coroutine, but it mentions that there could be functions like sync_await(task) to await completion of a task (similar to execution::sync_wait(sender)) or a few variations of that. - -A fair part of the paper argues why future.then() is not a good approach to model coroutines and their results. Using future requires allocation, synchronisation, reference counting, and scheduling which can all be avoided when using coroutines in a structured way. - -The paper also mentions support for symmetric transfer and allocator support. Both of these are details on how the coroutine is implemented. - -Discussion for P1056r0 in SG1 - -The task doesn’t really have anything to do with concurrency. -Decomposing a task cheaply is fundamental. The HALO Optimisations help. -The task isn’t move assignable because there are better approaches than using containers to hold them. It is move constructible as there are no issues with overwriting a potentially live task. -Resuming where things complete is unsafe but the task didn’t want to impose any overhead on everybody. -There can be more than one task type for different needs. -Holding a mutex lock while co_awaiting which may resume on a different thread is hazardous. Static analysers should be able to detect these cases. -Votes confirmed the no move assignment and forwarding to LEWG assuming the name is not task. -Votes against deal with associated executors and a request to have strong language about transfer between threads. -2.2 P2506: std::lazy: a coroutine for deferred execution -This paper is effectively restating what P1056 said with the primary change being more complete proposed wording. Although sender/receiver were discussed when the paper was written but std::execution hadn’t made it into the working paper, the proposal did not take a sender/receiver interface into account. - -Although there were mails seemingly scheduling a discussion in LEWG, we didn’t manage to actually locate any discussion notes. - -2.3 cppcoro -This library contains multiple coroutine types, algorithms, and some facilities for asynchronous work. For the purpose of this discussion only the task types are of interest. There are two task types cppcoro::task and cppcoro::shared_task. The key difference between task and shared_task is that the latter can be copied and awaited by multiple other coroutines. As a result shared_task always produces an lvalue and may have slightly higher costs due to the need to maintain a reference count. - -The types and algorithms are pre-sender/receiver and operate entirely in terms for awaiters/awaitables. The interface of both task types is a bit richer than that from P1056/P2506. Below t is either a cppcoro::task or a cppcoro::shared_task: - -The task objects can be move constructed and move assigned; shared_task object can also be copy constructed and copy assigned. -Using t.is_ready() it can be queried if t has completed. -Using co_await t awaits completion of t, yielding the result. The result may be throwing an exception if the coroutine completed by throwing. -Using co_await t.when_ready() allows synchronising with the completion of t without actually getting the result. This form of synchronisation won’t throw any exception. -cpproro::shared_task also supports equality comparisons. -In both cases, the task starts suspended and is resumed when it is co_awaited. This way a continuation is known when the task is resumed, which is similar to start(op)ing an operation state op. The coroutine body needs to use co_await or co_return. co_await expects an awaitable or an awaiter as argument. Using co_yield is not supported. The implementation supports symmetric transfer but doesn’t mention allocators. - -The shared_task is similar to split(sender): in both cases, the same result is produced for multiple consumers. Correspondingly, there isn’t a need to support a separate shared_task in a sender/receiver world. Likewise, throwing of results can be avoid by suitably rewriting the result of the set_error channel avoiding the need for an operation akin to when_ready(). - -2.4 libunifex -unifex is an earlier implementation of the sender/receiver ideas. Compared to std::execution it is lacking some of the flexibilities. For example, it doesn’t have a concept of environments or domains. However, the fundamental idea of three completion channels for success, failure, and cancellation and the general shape of how these are used is present (even using the same names for set_value and set_error; the equivalent of set_stopped is called set_done). unifex is in production use in multiple places. The implementation includes a unifex::task. - -As unifex is sender/receiver-based, its unifex::task is implemented such that co_await can deal with senders in addition to awaitables or awaiters. Also, unifex::task is scheduler affine: the coroutine code resumes on the same scheduler even if a sender completed on a different scheduler. The task’s scheduler is taken from the receiver it is connected to. The exception for rescheduling on the task’s scheduler is explicitly awaiting the result of schedule(sched) for some scheduler sched: the operation changes the task’s scheduler to be sched. The relevant treatment is in the promise type’s await_transform(): - -If a sender sndr which is the result of schedule(sched) is co_awaited, the corresponding sched is installed as the task’s scheduler and the task resumes on the context completing sndr. Feedback from people working with unifex suggests that this choice for changing the scheduler is too subtle. While it is considered important to be able to explicitly change the scheduler a task executes on, doing so should be more explicit. -For both senders and awaiters being awaited, the coroutine will be resumed on the task’s current scheduler when the task is scheduler affine. In general that is done by continuing with the senders result on the task’s scheduler, similar to continues_on(sender, scheduler). The rescheduling is avoided when the sender is tagged as not changing scheduler (using a static constexpr member named blocking which is initialized to blocking_kind::always_inline). -If a sender is co_awaited it gets connected to a receiver provided by the task to form an awaiter holding an operation state. The operation state gets started by the awaiter’s await_suspend. The receiver arranges for a set_value completion to become a value returned from await_resume, a set_error completion to become an exception, and a set_done completion to resume a special “on done” coroutine handle rather than resuming the task itself effectively behaving like an uncatchable exception (all relevant state is properly destroyed and the coroutine is never resumed). -When co_awaiting a sender sndr there can be at most one set_value completion: if there are more than one set_value completions the promise type’s await_transform will just return sndr and the result cannot be co_awaited (unless it is also given an awaitable interface). The result type of co_await sndr depends on the number of arguments to set_value: - -If there are no arguments for set_value then the type of co_await sndr will be void. -If there is exactly one argument of type T for set_value then the type of co_await sndr will be T. -If there are more than one arguments for set_value then the type of co_await sndr will be std::tuple with the corresponding argument types. -If a receiver doesn’t have a scheduler, it can’t be connect()ed to a unifex::task. In particular, when using a unifex::async_scope scope it isn’t possible to directly call scope.spawn(task) with a unifex::task task as the unifex::async_scope doesn’t provide a scheduler. The unifex::async_scope provides a few variations of spawn() which take a scheduler as argument. - -unifex provides some sender algorithms to transform the sender result into something which may be more suitable to be co_awaited. For example, unifex::done_as_optional(sender) turns a successful completion for a type T into an std::optional and the cancellation completion set_done into a set_value completion with a disengaged std::optional. - -The unifex::task is itself a sender and can be used correspondingly. To deal with scheduler affinity a type erased scheduler unifex::any_scheduler is used. - -The unifex::task doesn’t have allocator support. When creating a task multiple objects are allocated on the heap: it seems there is a total of 6 allocations for each unifex::task being created. After that, it seems the different co_awaits don’t use a separate allocation. - -The unifex::task doesn’t directly guard against stack overflow. Due to rescheduling continuations on a scheduler when the completion isn’t always inline, the issue only arises when co_awaiting many senders with blocking_kind::always_inline or when the scheduler resumes inline. - -2.5 stdexec -The exec::task in stdexec is somewhat similar to the unifex task with some choices being different, though: - -The exec::task is also scheduler affine. The chosen scheduler is unconditionally used for every co_await, i.e., there is no attempt made to avoid scheduling, e.g., when the co_awaited sender completes inline. -Unlike unifex, it is OK if the receiver’s environment doesn’t provide a scheduler. In that case an inline scheduler is used. If an inline scheduler is used there is the possibility of stack overflow. -It is possible to co_await just_error(e) and co_await just_stopped(), i.e., the sender isn’t required to have a set_value_t completion. -The exec::task also provides a context C. An object of this type becomes the environment for receivers connect()ed to co_awaited senders. The default context provides access to the task’s scheduler. In addition an in_place_stop_token is provides which forwards the stop requests from the environment of the receiver which is connected to the task. - -Like the unifex task exec::task doesn’t provide any allocator support. When creating a task there are two allocations. - -3 Objectives -Also see sender/receiver issue 241. - -Based on the prior work and discussions around corresponding coroutine support there is a number of required or desired features (listed in no particular order): - -A coroutine task needs to be awaiter/awaitable friendly, i.e., it should be possibly to co_await awaitables which includes both library provided and user provided ones. While that seems obvious, it is possible to create an await_transform which is deleted for awaiters and that should be prohibited. - -When composing sender algorithms without using a coroutine it is common to adapt the results using suitable algorithms and the completions for sender algorithms are designed accordingly. On the other hand, when awaiting senders in a coroutine it may be considered annoying having to transform the result into a shape which is friendly to a coroutine use. Thus, it may be reasonable to support rewriting certain shapes of completion signatures into something different to make the use of senders easier in a coroutine task. See the section on the result type for co_await for a discussion. - -A coroutine task needs to be sender friendly: it is expected that asynchronous code is often written using coroutines awaiting senders. However, depending on how senders are treated by a coroutine some senders may not be awaitable. For example neither unifex nor stdexec support co_awaiting senders with more than one set_value completion. - -It is possibly confusing and problematic if coroutines resume on a different execution context than the one they were suspended on: the textual similarity to normal functions makes it look as if things are executed sequentially. Experience also indicates that continuing a coroutine on whatever context a co_awaited operation completes frequently leads to issues. Senders could, however, complete on an entirely different scheduler than where they started. When composing senders (not using coroutines) changing contexts is probably OK because it is done deliberately, e.g., using continues_on, and the way to express things is new with fewer attached expectations. - -To bring these two views together a coroutine task should be scheduler affine by default, i.e., it should normally resume on the same scheduler. There should probably also be an explicit way to opt out of scheduler affinity when the implications are well understood. - -Note that scheduler affinity does not mean that a task is always continuing on the same thread: a scheduler may refer to a thread pool and the task will continue on one of the threads (which also means that thread local storage cannot be used to propagate contexts implicitly; see the discussion on environments below). - -When using coroutines there will probably be an allocation at least for the coroutine frame (the HALO optimisations can’t always work). To support the use in environments where memory allocations using new/delete aren’t supported the coroutine task should support allocations using allocators. - -Receivers have associated environments which can support an open set of queries. Normally, queries on an environment can be forwarded to the environment of a connect()ed receiver. Since the coroutine types are determined before the coroutine’s receiver is known and the queries themselves don’t specify a result type that isn’t possible when a coroutine provides a receiver to a sender in a co_await expression. It should still be possible to provide a user-customisable environment from the receiver used by co_await expressions. One aspect of this environment is to forward stop requests to co_awaited child operations. Another is possibly changing the scheduler to be used when a child operation queries get_scheduler from the receiver’s environment. Also, in non-asynchronous code it is quite common to pass some form of context implicitly using thread local storage. In an asynchronous world such contexts could be forwarded using the environment. - -The coroutine should be able to indicate that it was canceled, i.e., to get set_stopped() called on the task’s receiver. std::execution::with_awaitable_senders already provided this ability senders being co_awaited but that doesn’t necessarily extend to the coroutine implementation. - -Similar to indicating that a task got canceled it would be good if a task could indicate that an error occurred without throwing an exception which escapes from the coroutine. - -In general a task has to assume that an exception escapes the coroutine implementation. As a result, the task’s completion signatures need to include set_error_t(std::exception_ptr). If it can be indicated to the task that no exception will escape the coroutine, this completion signature can be avoided. - -When many co_awaited operations complete synchronously, there is a chance for stack overflow. It may be reasonable to have the implementation prevent stack overflow by using a suitable scheduler sometimes. - -In some situations it can be useful to somehow schedule an asynchronous clean-up operation which is triggered upon coroutine exit. See the section on asynchronous clean-up below for more discussing - -The task coroutine provided by the standard library may not always fit user’s needs although they may need/want various of the facilities. To avoid having users implement all functionality from scratch task should use specified components which can be used by users when building their own coroutine. The components as_awaitable and with_awaitable_sender are two parts of achieving this objective but there are likely others. - -The algorithm std::execution::as_awaitable does turn a sender into an awaitable and is expected to be used by custom written coroutines. Likewise, it is intended that custom coroutines use the CRTP class template std::execution::with_awaitable_senders. It may be reasonable to adjust the functionality of these components instead of defining the functionality specific to a task<...> coroutine task. - -It is important to note that different coroutine task implementations can live side by side: not all functionality has to be implemented by the same coroutine task. The objective for this proposal is to select a set of features which provides a coroutine task suitable for most uses. It may also be reasonable to provide some variations as different names. A future revision of the standard or third party libraries can also provide additional variations. - -4 Design -This section discusses various design options for achieving the listed objectives. Most of the designs are independent of each other and can be left out if the consensus is that it shouldn’t be used for whatever reason. - -4.1 Template Declaration for task -Coroutines can use co_return to produce a value. The value returned can reasonably provide the argument for the set_value_t completion of the coroutines. As the type of a coroutine is defined even before the coroutine body is given, there is no way to deduce the result type. The result type is probably the primary customisation and should be the first template parameter which gets defaulted to void for coroutines not producing any value. For example: - -int main() { - ex::sync_wait([]->ex::task<>{ - int result = co_await []->ex::task { co_return 42; }(); - assert(result == 42); - }()); -} -The inner coroutines completes with set_value_t(int) which gets translated to the value returned from co_await (see co_await result type below for more details). The outer coroutine completes with set_value_t(). - -Beyond the result type there are a number of features for a coroutine task which benefit from customisation or for which it may be desirable to disable them because they introduce a cost. As many template parameters become unwieldy, it makes sense to combine these into a [defaulted] context parameter. The aspects which benefit from customisation are at least: - -Customising the environment for child operations. The context itself can actually become part of the environment. -Disable scheduler affinity and/or configure the strategy for obtaining the coroutine’s scheduler. -Configure allocator awareness. -Indicate that the coroutine should be noexcept. -Define additional error types. -The default context should be used such that any empty type provides the default behaviour instead of requiring a lot of boilerplate just to configure a particular aspect. For example, it should be possible to selectively enable allocator support using something like this: - -struct allocator_aware_context { - using allocator_type = std::pmr::polymorphic_allocator; -}; -template -using my_task = ex::task; -Using various different types for task coroutines isn’t a problem as the corresponding objects normally don’t show up in containers. Tasks are mostly co_awaited by other tasks, used as child senders when composing work graphs, or maintained until completed using something like a counting_scope. When they are used in a container, e.g., to process data using a range of coroutines, they are likely to use the same result type and context types for configurations. - -4.2 task Completion Signatures -The discussion above established that task can have a successful completion using set_value_t(T). The coroutine completes accordingly when it is exited using a matching co_return. When T is void the coroutine also completes successfully using set_value() when floating off the end of the coroutine or when using a co_return without an expression. - -If a coroutine exits with an exception completing the corresponding operation with set_error(std::exception_ptr) is an obvious choice. Note that a co_await expression results in throwing an exception when the awaited operation completes with set_error(E) (see below), i.e., the coroutine itself doesn’t necessarily need to throw an exception itself. - -Finally, a co_await expression completing with set_stoppped() results in aborting the coroutine immediately (see below) and causing the coroutine itself to also complete with set_stopped(). - -The coroutine implementation cannot inspect the coroutine body to determine how the different asynchronous operations may complete. As a result, the default completion signatures for task are - -ex::completion_signatures< - ex::set_value_t(T), // or ex::set_value_t() if T == void - ex::set_error_t(std::exception_ptr), - ex:set_stopped_t() ->; -Support for reporting an error without exception may modify the completion signatures. - -4.3 task constructors and assignments -Coroutines are created via a factory function which returns the coroutine type and whose body uses one of the co_* function, e.g. - -task<> nothing(){ co_return; } -The actual object is created via the promise type’s get_return_object function and it is between the promise and coroutine types how that actually works: this constructor is an implementation detail. To be valid senders the coroutine type needs to be destructible and it needs to have a move constructor. Other than that, constructors and assignments either don’t make sense or enable dangerous practices: - -Copy constructor and copy assignment don’t make sense because there is no way to copy the actual coroutine state. - -Move assignment is rather questionable because it makes it easy to transport the coroutine away from referenced entities. - -Previous papers P1056 and P2506 also argued against a move assignment. However, one of the arguments doesn’t apply to the task proposed here: There is no need to deal with cancellation when assigning or destroying a task object. Upon start() of task the coroutine handle is transferred to an operation state and the original coroutine object doesn’t have any reference to the object anymore. - -If there is no assignment, a default constructed object doesn’t make much sense, i.e., task also doesn’t have a default constructor. - -Based on experience with Folly the suggestion was even stronger: task shouldn’t even have move construction! That would mean that task can’t be a sender or that there would need to be some internal interface enabling the necessary transfer. That direction isn’t pursued by this proposal. - -The lack of move assignment doesn’t mean that task can’t be held in a container: it is perfectly fine to push_back objects of this type into a container, e.g.: - -std::vector> cont; -cont.emplace_back([]->ex::task<> { co_return; }()); -cont.push_back([]->ex::task<> { co_return; }()); -The expectation is that most of the time coroutines don’t end up in normal containers. Instead, they’d be managed by a counting_scope or hold on to by objects in a work graph composed of senders. - -Technically there isn’t a problem adding a default constructor, move assignment, and a swap() function. Based on experience with similar components it seems task is better off not having them. - -4.4 Result Type For co_await -When co_awaiting a sender sndr in a coroutine, sndr needs to be transformed to an awaitable. The existing approach is to use execution::as_waitable(sndr) [exex.as.awaitable] in the promise type’s await_transform and task uses that approach. The awaitable returned from as_awaitable(sndr) has the following behaviour (rcvr is the receiver the sender sndr is connected to): - -When sndr completes with set_stopped(std::move(rcvr)) the function unhandled_stopped() on the promise type is called and the awaiting coroutine is never resumed. The unhandled_stopped() results in task itself also completing with set_stopped_t(). - -When sndr completes with set_error(std::move(rcvr), error) the coroutine is resumed and the co_await sndr expression results in error being thrown as an exceptions (with special treatment for std::error_code). - -When sndr completes with set_value(std::move(rcvr), a...) the expression co_await sndr produces a result corresponding the arguments to set_value: - -If the argument list is empty, the result of co_await sndr is void. -Otherwise, if the argument list contains exactly one element the result of co_await sndr is a.... -Otherwise, the result of co_await sndr is std::tuple(a...). -Note that the sender sndr is allowed to have no set_value_t completion signatures. In this case the result type of the awaitable returned from as_awaitable(sndr) is declared to be void but co_await sndr would never return normally: the only ways to complete without a set_value_t completion is to complete with set_stopped(std::move(rcvr) or with set_error(std::move(rcvr), error), i.e., the expression either results in the coroutine to be never resumed or an exception being thrown. - -Here is an example which summarises the different supported result types: - -task<> fun() { - co_await ex::just(); // void - auto v = co_await ex::just(0); // int - auto[i, b, c] = co_await ex::just(0, true, 'c'); // tuple - try { co_await ex::just_error(0); } catch (int) {} // exception - co_await ex::just_stopped(); // cancel: never resumed -} -The sender sndr can have at most one set_value_t completion signature: if there are more than one set_value_t completion signatures as_awaitable(sndr) is invalid and fails to compile: users who want to co_await a sender with more than one set_value_t completions need to use co_await into_variant(s) (or similar) to transform the completion signatures appropriately. It would be possible to move this transformation into as_awaitable(sndr). - -Using effectively into_variant(s) isn’t the only possible transformation if there are multiple set_value_t transformations. To avoid creating a fairly hard to use result object, as_awaitable(sndr) could detect certain usage patterns and rather create a result which is easier to use when being co_awaited. An example for this situation is the queue.async_pop() operation for concurrent queues: this operation can complete successfully in two ways: - -When an object was extracted the operation completes with set_value(std::move(rcvr), value). -When the queue was closed the operation completes with set_value(std::move(rcvr)). -Turning the result of queue.async_pop() into an awaitable using the current as_awaitable(queue.async_pop()) ([exec.as.awaitable]) fails because the function accepts only senders with at most one set_value_t completion. Thus, it is necessary to use something like the below: - -task<> pop_demo(auto& queue) { - // auto value = co_await queue.async_pop(); // doesn't work - std::optional v0 = co_await (queue.async_pop() | into_optional); - std::optional v1 = co_await into_optional(queue.async_pop()); -} -The algorithm into_optional(sndr) would determine that there is exactly one set_value_t completion with arguments and produce an std::optional if there is just one parameter of type T and produce a std::optional> if there are more than one parameter with types T.... It would be possible to apply this transformation when a corresponding set of completions is detected. The proposal optional variants in sender/receiver goes into this direction. - -This proposal currently doesn’t propose a change to as_awaitable ([exec.as.awaitable]). The primary reason is that there are likely many different shapes of completions each with a different desirable transformation. If these are all absorbed into as_awaitable it is likely fairly hard to reason what exact result is returned. Also, there are likely different options of how a result could be transformed: into_optional is just one example. It could be preferable to turn the two results into an std::expected instead. However, there should probably be some transformation algorithms like into_optional, into_expected, etc. similar to into_variant. - -4.5 Scheduler Affinity -Coroutines look very similar to synchronous code with a few co-keywords sprinkled over the code. When reading such code the expectation is typically that all code executes on the same context despite some co_await expressions using senders which may explicitly change the scheduler. There are various issues when using co_await naïvely: - -Users may expect that work continues on the same context where it was started. If the coroutine simply resumes when the co_awaited senders calls a completion function code may execute some lengthy operation on a context which is expected to keep a UI responsive or which is meant to deal with I/O. -Conversely, running a loop co_awaiting some work may be seen as unproblematic but may actually easily cause a stack overflow if co_awaited work immediately completes (also see below). -When co_awaiting some work completes on a different context and later a blocking call is made from the coroutine which also ends up co_awaiting some work from the same resource there can be a dead lock. -Thus, the execution should normally be scheduled on the original scheduler: doing so can avoid the problems mentioned above (assuming a scheduler is used which doesn’t immediately complete without actually scheduling anything). This transfer of the execution with a coroutine is referred to as scheduler affinity. Note: a scheduler may execute on multiple threads, e.g., for a pool scheduler: execution would get to any of these threads, i.e., thread local storage is not guaranteed to access the same data even with scheduler affinity. Also, scheduling work has some cost even if this cost can often be fairly small. - -The basic idea for scheduler affinity consists of a few parts: - -A scheduler is determined when starting an operation state which resulted from connecting a coroutine to a receiver. This scheduler is used to resume execution of the coroutine. The scheduler is determined based on the receiver rcvr’s environment. - -auto scheduler = get_scheduler(get_env(rcvr)); -The type of scheduler is unknown when the coroutine is created. Thus, the coroutine implementation needs to operate in terms of a scheduler with a known type which can be constructed from scheduler. The used scheduler type is determined based on the context parameter C of the coroutine type task using typename C::scheduler_type and defaults to task_scheduler if this type isn’t defined. task_scheduler uses type-erasure to deal with arbitrary schedulers (and small object optimisations to avoid allocations). The used scheduler type can be parameterised to allow use of task contexts where the scheduler type is known, e.g., to avoid the costs of type erasure. - -Originally task_scheduler was called any_scheduler but there was feedback from SG1 suggesting that a general any_scheduler may need to cover various additional properties. To avoid dealing with generalizing the facility a different name is used. The name remains specified as it is still a useful component, at least until an any_scheduler is defined by the standard library. If necessary, the type erased scheduler type used by task can be unspecified. - -When an operation which is co_awaited completes the execution is transferred to the held scheduler using continues_on. Injecting this operation into the graph can be done in the promise type’s await_transform: - -template -auto await_transform(Sender&& sndr) noexcept { - return ex::as_awaitable_sender( - ex::continues_on(std::forward(sndr), - this->scheduler); - ); -} -There are a few immediate issues with the basic idea: - -What should happen if there is no scheduler, i.e., get_scheduler(get_env(rcvr)) doesn’t exist? -What should happen if the obtained scheduler is incompatible with the coroutine’s scheduler? -Scheduling isn’t free and despite the potential problems it should be possible to use task without scheduler affinity. -When operations are known to complete inline the scheduler isn’t actually changed and the scheduling operation should be avoided. -It should be possible to explicitly change the scheduler used by a coroutine from within this coroutine. -All of these issues can be addressed although there are different choices in some of these cases. - -In many cases the receiver can provide access to a scheduler via the environment query. An example where no scheduler is available is when starting a task on a counting_scope. The scope doesn’t know about any schedulers and, thus, the receiver used by counting_scope when connecting to a sender doesn’t support the get_scheduler query, i.e., this example doesn’t work: - -ex::spawn([]->ex::task { co_await ex::just(); }(), token); -Using spawn() with coroutines doing the actual work is expected to be quite common, i.e., it isn’t just a theoretical possibility that task is used together with counting_scope. The approach used by unifex is to fail compilation when trying to connect a Task to a receiver without a scheduler. The approach taken by stdexec is to keep executing inline in that case. Based on the experience that silently changing contexts within a coroutine frequently causes bugs it seems failing to compile is preferable. - -Failing to construct the scheduler used by a coroutine with the scheduler obtained from the receiver is likely an error and should be addressed by the user appropriately. Failing to compile is seems to be a reasonable approach in that case, too. - -It should be possible to avoid scheduler affinity explicitly to avoid the cost of scheduling. Users should be very careful when pursuing this direction but it can be a valid option. One way to achieve that is to create an “inline scheduler” which immediately completes when it is start()ed and using this type for the coroutine. Explicitly providing a type inline_scheduler implementing this logic could allow creating suitable warnings. It would also allow detecting that type in await_transform and avoiding the use of continues_on entirely. - -When operations actually don’t change the scheduler there shouldn’t be a need to schedule them again. In these cases it would be great if the continues_on could be avoided. At the moment there is no way to tell whether a sender will complete inline. Using a sender query which determines whether a sender always completes inline could avoid the rescheduling. Something like that is implemented for unifex: senders define a property blocking which can have the value blocking_kind::always_inline. The proposal A sender query for completion behaviour proposes a get_completion_behaviour(sndr, env) customisation point to address this need. The result can indicate that the sndr returns synchronously (using completion_behaviour::synchronous or completion_behaviour::inline_completion). If sndr returns synchronously there isn’t a need to reschedule it. - -In some situations it is desirable to explicitly switch to a different scheduler from within the coroutine and from then on carry on using this scheduler. unifex detects the use of co_await schedule(scheduler); for this purpose. That is, however, somewhat subtle. It may be reasonable to use a dedicated awaiter for this purpose and use, e.g. - -auto previous = co_await co_continue_on(new_scheduler); -Using this statement replaces the coroutine’s scheduler with the new_scheduler. When the co_await completes it is on new_scheduler and further co_await operations complete on new_scheduler. The result of co_awaiting co_continue_on is the previously used scheduler to allow transfer back to this scheduler. In stdexec the corresponding operation is called reschedule_coroutine. - -Another advantage of scheduling the operations on a scheduler instead of immediately continuing on the context where the operation completed is that it helps with stack overflows: when scheduling on a non-inline scheduler the call stack is unwound. Without that it may be necessary to inject scheduling just for the purpose of avoiding stack overflow when too many operations complete inline. - -4.6 Allocator Support -When using coroutines at least the coroutine frame may end up being allocated on the heap: the HALO optimisations aren’t always possible, e.g., when a coroutine becomes a child of another sender. To control how this allocation is done and to support environments where allocations aren’t possible task should have allocator support. The idea is to pick up on a pair of arguments of type std::allocator_arg_t and an allocator type being passed and use the corresponding allocator if present. For example: - -struct allocator_aware_context { - using allocator_type = std::pmr::polymorphic_allocator; -}; - -template -ex::task fun(int value, A&&...) { - co_return value; -} - -int main() { - // Use the coroutine without passing an allocator: - ex::sync_wait(fun(17)); - - // Use the coroutine with passing an allocator: - using allocator_type = std::pmr::polymorphic_alloctor; - ex::sync_wait(fun(17, std::allocator_arg, allocator_type())); -} -The arguments passed when creating the coroutine are made available to an operator new of the promise type, i.e., this operator can extract the allocator, if any, from the list of parameters and use that for the purpose of allocation. The matching operator delete gets passed only the pointer to release and the originally requested size. To have access to the correct allocator in operator delete the allocator either needs to be stateless or a copy needs to be accessible via the pointer passed to operator delete, e.g., stored at the offset size. - -To avoid any cost introduced by type erasing an allocator type as part of the task definition the expected allocator type is obtained from the context argument C of task: - -using allocator_type = ex::allocator_of_t; -This using alias uses typename C::allocator_type if present or defaults to std::allocator otherwise. This allocator_type has to be for the type std::byte (if necessary it is possible to relax that constraint). - -The allocator used for the coroutine frame should also be used for any other allocators needed for the coroutine itself, e.g., when type erasing something needed for its operation (although in most cases a small object optimisation would be preferable and sufficient). Also, the allocator should be made available to child operations via the respective receiver’s environment using the get_allocator query. The arguments passed to the coroutine are also available to the constructor of the promise type (if there is a matching on) and the allocator can be obtained from there: - -struct allocator_aware_context { - using allocator_type = pmr::polymorphic_allocator; -}; -fixed_resource<2048> resource; - -ex::sync_wait([](auto&&, auto* resource) - -> ex::task { - auto alloc = co_await ex::read_env(ex::get_allocator); - use(alloc); -}(allocator_arg, &resource)); -4.7 Environment Support -When co_awaiting child operations these may want to access an environment. Ideally, the coroutine would expose the environment from the receiver it gets connected to. Doing so isn’t directly possible because the coroutine types doesn’t know about the receiver type which in turn determines the environment type. Also, the queries don’t know the type they are going to return. Thus, some extra mechanisms are needed to provide an environment. - -A basic environment can be provided by some entities already known to the coroutine, though: - -The get_scheduler query should provide the scheduler maintained for scheduler affinity whose type is determined based on the coroutine’s context using ex::scheduler_of_t. -The get_allocator query should provide the coroutine’s allocator whose type is determined based on the coroutine’s context using ex::allocator_of_t. The allocator gets initialized when constructing the promise type. -The get_stop_token query should provide a stop token from a stop source which is linked to the stop token obtained from the receiver’s environment. The type of the stop source is determined from the coroutine’s context using ex::stop_source_of_t and defaults to ex::inplace_stop_source. Linking the stop source can be delayed until the first stop token is requested or omitted entirely if stop_possible() returns false or if the stop token type of the coroutine’s receiver matches that of ex::stop_source_of_t. -For any other environment query the context C of task can be used. The coroutine can maintain an instance of type C. In many cases queries from the environment of the coroutine’s receiver need to be forwarded. Let env be get_env(receiver) and Env be the type of env. C gets optionally constructed with access to the environment: - -If C::env_type is a valid type the coroutine state will contain an object own_env of this type which is constructed with env. The object own_env will live at least as long as the C object maintained and C is constructed with a reference to own_env, allowing C to reference type-erased representations for query results it needs to forward. -Otherwise, if C(env) is valid the C object is constructed with the result of get_env(receiver). Constructing the context with the receiver’s environment provides the opportunity to store whatever data is needed from the environment to later respond to queries as well. -Otherwise, C is default constructed. This option typically applies if C doesn’t need to provide any environment queries. -Any query which isn’t provided by the coroutine but is available from the context C is forwarded. Any other query shouldn’t be part of the overload set. - -For example: - -struct context { - int value{}; - int query(get_value_t const&) const noexcept { return this->value; } - context(auto const& env): value(get_value(env)) {} -}; - -int main() { - ex::sync_wait( - ex::write_env( - []->demo::task { - auto sched(co_await ex::read_env(get_scheduler)); - auto value(co_await ex::read_env(get_value)); - std::cout << "value=" << value << "\n"; - // ... - }(), - ex::make_env(get_value, 42) - ) - ); -} -4.8 Support For Requesting Cancellation/Stopped -When a coroutine task executes the actual work it may listen to a stop token to recognise that it got canceled. Once it recognises that its work should be stopped it should also complete with set_stopped(rcvr). There is no special syntax needed as that is the result of using just_stopped(): - -co_await ex::just_stopped(); -The sender just_stopped() completes with set_stopped() causing the coroutine to be canceled. Any other sender completing with set_stopped() can also be used. - -4.9 Error Reporting -The sender/receiver approach to error reporting is for operations to complete with a call to set_error(rcvr, err) for some receiver object rcvr and an error value err. The details of the completions are used by algorithms to decide how to proceed. For example, if any of the senders of when_all(sndr...) fails with a set_error_t completion the other senders are stopped and the overall operation fails itself forwarding the first error. Thus, it should be possible for coroutines to complete with a set_error_t completion. Using a set_value_t completion using an error value isn’t quite the same as these are not detected as errors by algorithms. - -The error reporting used for unifex and stdexec is to turn an exception escaping from the coroutine into a set_error_t(std::exception_ptr) completion: when unhandled_exception() is called on the promise type the coroutine is suspended and the function can just call set_value(r, std::get_current_exception()). There are a few limitations with this approach: - -The only supported error completion is set_error_t(std::exception_ptr). While the thrown exception can represent any error type and set_error_t completions from co_awaited operations resulting in the corresponding error being thrown it is better if the other error types can be reported, too. -To report an error an exception needs to be thrown. In some environments it is preferred to not throw exception or exceptions may even be entirely banned or disabled which means that there isn’t a way to report errors from coroutines unless a different mechanism is provided. -To extract the actual error information from std::exception_ptr the exception has to be rethrown. -The completion signatures for task necessarily contain set_error_t(std::exception_ptr) which is problematic when exceptions are unavailable: std::exception_ptr may also be unavailable. Also, without exception as it is impossible to decode the error. It can be desirable to have coroutine which don’t declare such a completion signature. -Before going into details on how errors can be reported it is necessary to provide a way for task to control the error completion signatures. Similar to the return type the error types cannot be deduced from the coroutine body. Instead, they can be declared using the context type C: - -If present, typename C::error_signatures is used to declare the error types. This type needs to be a specialisation of completion_signatures listing the valid set_error_t completions. -If this nested type is not present, completion_signatures is used as a default. -The name can be adjusted and it would be possible to use a different type list template and listing the error types. The basic idea would remain the same, i.e., the possible error types are declared via the context type. - -Reporting an error by having an exception escape the coroutine is still possible but it doesn’t necessarily result in a set_error_t: If an exception escapes the coroutine and set_error_t(std::exception_ptr) isn’t one of the supported the set_error_t completions, std::terminate() is called. If an error is explicitly reported somehow, e.g., using one of the approaches described below, and the error type isn’t supported by the context’s error_signatures, the program is ill-formed. - -The discussion below assumes the use of the class template with_error to indicate that the coroutine completed with an error. It can be as simple as - -template struct with_error{ E error; }; -The name can be different although it shouldn’t collide with already use names (like error_code or upon_error). Also, in some cases there isn’t really a need to wrap the error into a recognisable class template. Using a marker type probably helps with readability and avoiding ambiguities in other cases. - -Besides exceptions there are three possible ways how a coroutine can be exited: - -The coroutine is exited when using co_return, optionally with an argument. Flowing off the end of a coroutine is equivalent to explicitly using co_return; instead of flowing off. It would be possible to turn the use of - -co_return with_error{err}; -into a set_error(std::move(rcvr), err) completion. - -One restriction with this approach is that for a task the body can’t contain co_return with_error{e};: the void result requires that the promise type contains a function return_void() and if that is present it isn’t possible to also have a return_value(T). - -When a coroutine uses co_await a; the coroutine is in a suspended state when await_suspend(...) of some awaiter is entered. While the coroutine is suspended it can be safely destroyed. It is possible to complete the coroutine in that state and have the coroutine be cleaned up. This approach is used when the awaited operation completes with set_stopped(). It is possible to call set_error(std::move(rcvr), err) for some receiver rcvr and error err obtained via the awaitable a. Thus, using - -co_await with_error{err}; -could complete with set_error(std::move(rcvr), err). - -Using the same notation for awaiting outstanding operations and returning results from a coroutine is, however, somewhat surprising. The name of the awaiter may need to become more explicit like exist_coroutine_with_error if this approach should be supported. - -When a coroutine uses co_yield v; the promise member yield_value(T) is called which can return an awaiter a. When a’s await_suspend() is called, the coroutine is suspended and the operation can complete accordingly. Thus, using - -co_yield with_error{err}; -could complete with set_error(std::move(rcvr), err). Using co_yield for the purpose of returning from a coroutine with a specific result seems more expected than using co_await. - -There are technically viable options for returning an error from a coroutine without requiring exceptions. Whether any of them is considered suitable from a readability point of view is a separate question. - -One concern which was raised with just not resuming the coroutine is that the time of destruction of variables used by the coroutine is different. The promise object can be destroyed before completing which might address the concern. - -Using co_await or co_yield to propagate error results out of the coroutine has a possibly interesting variation: in both of these case the error result may be conditionally produced, i.e., it is possible to complete with an error sometimes and to produce a value at other times. That could allow a pattern (using co_yield for the potential error return): - -auto value = co_yield when_error(co_await into_expected(sender)); -The subexpression into_expected(sender) could turn the set_value_t and set_error_t into a suitable std::expected> always reported using a set_value_t completion (so the co_await doesn’t throw). The corresponding std::expected becomes the result of the co_await. Using co_yield with when_error(exp) where exp is an expected can then either produce exp.value() as the result of the co_yield expression or it can result in the coroutine completing with the error from exp.error(). Using this approach produces a fairly compact approach to propagating the error retaining the type and without using exceptions. - -4.10 Avoiding Stack Overflow -It is easy to use a coroutine to accidentally create a stack overflow because loops don’t really execute like loops. For example, a coroutine like this can easily result in a stack overflow: - -ex::sync_wait(ex::write_env( - []() -> ex::task { - for (int i{}; i < 1000000; ++i) - co_await ex::just(i); - }(), - ex::make_env(ex::get_scheduler, ex::inline_scheduler{}) -)); -The reason this innocent looking code creates a stack overflow is that the use of co_await results in some function calls to suspend the coroutine and then further function calls to resume the coroutine (for a proper explanation see, e.g., Lewis Baker’s Understanding Symmetric Transfer). As a result, the stack grows with each iteration of the loop until it eventually overflows. - -With senders it is also not possible to use symmetric transfer to combat the problem: to achieve the full generality and composing senders, there are still multiple function calls used, e.g., when producing the completion signal. Using get_completion_behaviour from the proposal A sender query for completion behaviour could allow detecting senders which complete synchronously. In these cases the stack overflow could be avoided relying on symmetric transfer. - -When using scheduler affinity the transfer of control via a scheduler which doesn’t complete immediately does avoid the risk of stack overflow: even when the co_awaited work immediately completes as part of the await_suspend call of the created awaiter the coroutine isn’t immediately resumed. Instead, the work is scheduled and the coroutine is suspended. The thread unwinds its stack until it reaches its own scheduling and picks up the next entity to execute. - -When using sync_wait(sndr) the run_loop’s scheduler is used and it may very well just resume the just suspended coroutine: when there is scheduling happening as part of scheduler affinity it doesn’t mean that work gets scheduled on a different thread! - -The problem with stack overflows does remain when the work resumes immediately despite using scheduler affinity. That may be the case when using an inline scheduler, i.e., a scheduler with an operation state whose start() immediately completes: the scheduled work gets executed as soon as set_value(std::move(rcvr)) is called. - -Another potential for stack overflows is when optimising the behaviour for work which is known to not move to another scheduler: in that case there isn’t really any need to use continue_on to get back to the scheduler where the operation was started! The execution remained on that scheduler all along. However, not rescheduling the work means that the stack isn’t unwound. - -Since task uses scheduler affinity by default, stack overflow shouldn’t be a problem and there is no separate provision required to combat stack overflow. If the implementation chooses to avoid rescheduling work it will need to make sure that doing so doesn’t cause any problems, e.g., by rescheduling the work sometimes. When using an inline scheduler the user will need to be very careful to not overflow the stack or cause any of the various other problems with executing immediately. - -4.11 Asynchronous Clean-Up -Asynchronous clean-up of objects is an important facility. Both unifex and stdexec provide some facilities for asynchronous clean-up in their respective coroutine task. Based on the experience the recommendation is to do something different! - -The recommended direction is to support asynchronous resources independent of a coroutine task. For example the async-object proposal is in this direction. There is similar work ongoing in the context of Folly. Thus, there is currently no plan to support asynchronous clean-up as part of the task implementation. Instead, it can be composed based on other facilities. - -5 Caveats -The use of coroutines introduces some issues which are entirely independent of how specific coroutines are defined. Some of these were brought up on prior discussions but they aren’t anything which can be solved as part of any particular coroutine implementation. In particular: - -As co_awaiting the result of an operation (or co_yielding a value) may suspend a coroutine, there is a potential to introduce problems when resources which are meant to be held temporarily are held when suspending. For example, holding a lock to a mutex while suspending a coroutine can result in a different thread trying to release the lock when the coroutine is resumed (scheduler affinity will move the resumed coroutine to the same scheduler but not to the same thread). -Destroying a coroutine is only safe when it is suspended. For the task implementation that means that it shall only call a completion handler once the coroutine is suspended. That part is under the control of the coroutine implementation. However, there is no way to guard against users explicitly destroying a coroutine from within its implementation or from another thread while it is not suspended: that’s akin to destroying an object while it being used. -Debugging asynchronous code doesn’t work with the normal approaches: there is generally no suitable stack as work gets resumed from some run loop which doesn’t tell what set up the original work. To improve on this situation, async stack traces linking different pieces of outstanding work together can help. At CppCon 2024 Ian Petersen and Jessica Wong presented how that may work (watch the video). Implementations should consider adding corresponding support and enhance tooling, e.g., debuggers, to pick up on async stack traces. However, async stack support itself isn’t really something which one coroutine implementation can enable. -While these issues are important this proposal isn’t the right place to discuss them. Discussion of these issues should be delegated to suitable proposals wanting to improve this situation in some form. - -6 Questions -This section lists questions based on the design discussion above. Each one has a recommendation and a vote is only needed if there opinions deviating from the recommendation. - -Result type: expand as_awaitable(sndr) to support more than one set_value_t(T...) completion? Recommendation: no. -Result type: add transformation algorithms like into_optional, into_expected? Recommendation: no, different proposals. -Scheduler affinity: should task support scheduler affinity? Recommendation: yes. -Scheduler affinity: require a get_scheduler() query on the receiver’s environments? Recommendation: yes. -Scheduler affinity: add a definition for inline_scheduler (using whatever name) to support disabling scheduler affinity? Recommendation: yes. -Allocator support: should task support allocators (default std::allocator)? Recommendation: yes. -Error reporting: should it be possible to return an error without throwing an exception? Recommendation: yes. -Error reporting: how should errors be reported? Recommendation: using `co_yield with_error(e). -Error reporting: should co_yield when_error(expected) be supported? Recommendation: yes (although weakly). -Clean-up: should asynchronous clean-up be supported? Recommendation: no. -7 Implementation -An implementation of task as proposed in this document is available from beman::task. This implementation hasn’t received much use, yet, as it is fairly new. It is setup to be buildable and provides some examples as a starting point for experimentation. - -Coroutine tasks very similar although not identical to the one proposed are used in multiple projects. In particular, there are three implementations in wide use: - -Folly::Task -unifex::Task -stdexec::task -The first one (Folly::Task) isn’t based on sender/receiver. Usage experience from all three have influenced the design of task. - -8 Acknowledgements -We would like to thank Ian Petersen, Alexey Spiridonov, and Lee Howes for comments on drafts of this proposal and general guidance. - -9 Proposed Wording -In [version.syn], add a row - -#define __cpp_lib_task YYYMML // also in -In [except.terminate] paragraph 1 add this bullet at the end of Note 1: - -[ Note: These situations are - -… - -when unhandled_stopped is called on a with_awaitable_senders object ([exec.with.awaitable.senders]) whose continuation is not a handle to a coroutine whose promise type has an unhandled_stopped member function., or - -when an exception is thrown from a coroutine std::execution::task which doesn’t support a std::execution::set_error_t(std::execption_ptr) completion. -— end note ] -In 33.4 [execution.syn] add declarations for the new classes: - -namespace std::execution { - ... - // [exec.with.awaitable.senders] - template - struct with_awaitable_senders; - // [exec.affine.on] - struct affine_on_t { unspecified }; - inline constexpr affine_on_t affine_on; - - // [exec.inline.scheduler] - class inline_scheduler; - - // [exec.task.scheduler] - class task_scheduler; - - // [exec.task] - template - class task; -} -Add new subsections for the different classes at the end of 33 [exec]: - -[ Drafting note: Evertyhing below is text meant to go to the end of the 33 [exec] section without any color highlight of what it being added. ] - -9.1 execution::affine_on [exec.affine.on] -1affine_on adapts a sender into one that completes on the specified scheduler. If the algorithm determines that the adapted sender already completes on the correct scheduler it can avoid any scheduling operation. - -2The name affine_on denotes a pipeable sender adaptor object. For subexpressions sch and sndr, if decltype((sch)) does not satisfy scheduler, or decltype((sndr)) does not satisfy sender, affine_on(sndr, sch) is ill-formed. - -3Otherwise, the expression affine_on(sndr, sch) is expression-equivalent to: - - transform_sender(get-domain-early(sndr), make-sender(affine_on, sch, sndr)) -except that sndr is evalutated only once. - -4The exposition-only class template impls-for is specialized for affine_on_t as follows: - - namespace std::execution { - template <> - struct impls-for: default-impls { - static constexpr auto get-attrs = - [](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); - }; - }; - } -5Let out_sndr be a subexpression denoting a sender returned from affine_on(sndr, sch) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be a subexpression denoting a receiver that has an environment of type Env such that sender_in is true. Let op be an lvalue referring to the operation state that results from connecting out_sndr to out_rcvr. Calling start(op) will start sndr on the current execution agent and execute completion operations on out_rcvr on an execution agent of the execution resource associated with sch. If the current execution resource is the same as the execution resource associated with sch, the completion operation on out_rcvr may be called before start(op) completes. If scheduling onto sch fails, an error completion on out_rcvr shall be executed on an unspecified execution agent. - -9.2 execution::inline_scheduler [exec.inline.scheduler] -namespace std::execution { - class inline_scheduler { - class inline-sender; // exposition only - template - class inline-state; // exposition only - - public: - using scheduler_concept = scheduler_t; - - constexpr inline-sender schedule() noexcept { return {}; } - constexpr bool operator== (const inline_scheduler&) const noexcept = default; - }; -} -1inline_scheduler is a class that models scheduler [exec.scheduler]. All objects of type inline_scheduler are equal. - -2inline-sender is an exposition-only type that satisfies sender. The type -completion_signatures_of_t is completion_signatures. - -3Let sndr be an expression of type inline-sender, let rcvr be an expression such that receiver_of is true where CS is completion_signatures, then: - -(3.1)the expression connect(sndr, rcvr) has type inline-state> and is potentially-throwing if and only if ((void)sndr, auto(rcvr)) is potentially-throwing, and -(3.2)the expression get_completion_scheduler(get_env(sndr)) has type inline_scheduler and is potentially-throwing if and only if get_env(sndr) is potentially-throwing. -4Let o be a non-const lvalue of type inline-state, and let REC(o) be a non-const lvalue reference to an object of type Rcvr that was initialized with the expression rcvr passed to an invocation of connect that returned o, then: - -(4.1)the object to which REC(o) refers remains valid for the lifetime of the object to which o refers, and -(4.2)the expression start(o) is equivalent to set_value(std::move(REC(o))). -9.3 execution::task_scheduler [exec.task.scheduler] -namespace std::execution { - class task_scheduler { - class sender; // exposition only - template - class state; // exposition only - - public: - using scheduler_concept = scheduler_t; - - template > - requires (!same_as>) - && scheduler - explicit task_scheduler(Sch&& sch, Allocator alloc = {}); - - sender schedule(); - - friend bool operator== (const task_scheduler& lhs, const task_scheduler& rhs) - noexcept; - template - requires (!same_as) - && scheduler - friend bool operator== (const task_scheduler& lhs, const Sch& rhs) noexcept; - - private: - shared_ptr sch_; // exposition only - }; -} -1task_scheduler is a class that models scheduler [exec.scheduler]. Given on object s of type task_scheduler, let SCHED(s) be the object owned by s.sch_. - -template > - requires(!same_as>) && scheduler -explicit task_scheduler(Sch&& sch, Allocator alloc = {}); -2Effects: Initialize sch_ with allocate_shared>(alloc, std::forward(sch)). - -3Recommended practice: Implementations should avoid the use of dynamically allocated memory for small scheduler objects. - -4Remarks: Any allocations performed by construction of sender or state objects resulting from calls on *this are performed using a copy of alloc. - -sender schedule(); -5Effects: Returns an object of type sender containing a sender initialized with schedule(SCHED(*this)). - -bool operator== (const task_scheduler& lhs, const task_scheduler& rhs) noexcept; -6Effects: Equivalent to: return lhs == SCHED(rhs); - -template - requires (!same_as) - && scheduler -bool operator== (const task_scheduler& lhs, const Sch& rhs) noexcept; -7Returns: false if type of SCHED(lhs) is not Sch, otherwise SCHED(lhs) == rhs; - -class task_scheduler::sender { // exposition only -public: - using sender_concept = sender_t; - - template - state connect(R&& rcvr); -}; -8sender is an exposition-only class that models sender [exec.sender] and for which completion_signatures_of_t denotes: - -completion_signatures< - set_value_t(), - set_error_t(error_code), - set_error_t(exception_ptr), - set_stopped_t()> -9Let sch be an object of type task_scheduler and let sndr be an object of type sender obtained from schedule(sch). Then get_completion_scheduler(get_env(sndr)) == sch is true. The object SENDER(sndr) is the sender object contained by sndr or an object move constructed from it. - -template -state connect(Rcvr&& rcvr); -10Effects: Let r be an object of a type that models receiver and whose completion handlers result in invoking the corresponding completion handlers of rcvr or copy thereof. Returns an object of type state containing an operation state object initialized with connect(SENDER(*this), std::move(r)). - -template -class task_scheduler::state { // exposition only -public: - using operation_state_concept = operation_state_t; - - void start() & noexcept; -}; -11state is an exposition-only class template whose specializations model operation_state 33.8 [exec.opstate]. - -void start() & noexcept; -12Effects: Equivalent to start(st) where st is the operation state object contained by *this. - -9.4 execution::task [exec.task] -9.4.1 task Overview [task.overview] -1The task class template represents a sender that can be used as the return type of coroutines. The first template parameter T defines the type value completion datum (33.3 [exec.async.ops]) if T is not void. Otherwise, there are no value completion datums. Inside coroutines returning task the operand of co_return (if any) becomes the argument of set_value. The second template parameter Environment is used to customize the behavior of task. - -9.4.2 Class template task [task.class] -namespace std::execution { - template - class task { - // [task.state] - template - class state; // exposition only - - public: - using sender_concept = sender_t; - using completion_signatures = see below; - using allocator_type = see below; - using scheduler_type = see below; - using stop_source_type = see below; - using stop_token_type = decltype(declval().get_token()); - using error_types = see below; - - // [task.promise] - class promise_type; - - task(task&&) noexcept; - ~task(); - - template - state connect(R&& recv); - - private: - coroutine_handle handle; // exposition only - }; -} -1task models sender 33.9 [exec.snd] if T is void, a reference type, or an cv-unqualified non-array object type and E is class type. Otherwise a program that instantiates the definition of task is ill-formed. - -2The nested types of task template specializations are determined based on the Environment parameter: - -(2.1)allocator_type is Environment::allocator_type if that qualified-id is valid and denotes a type, allocator otherwise. -(2.2)scheduler_type is Environment::scheduler_type if that qualified-id is valid and denotes a type, task_scheduler otherwise. -(2.3)stop_source_type is Environment::stop_source_type if that qualified-id is valid and denotes a type, inplace_stop_source otherwise. -(2.4)error_types is Environment::error_types if that qualified-id is valid and denotes a type, completion_signatures otherwise. -3A program is ill-formed if error_types is not a specialization of completion_signatures or ErrorSigs contains an element which is not of the form set_error_t(E) for some type E. - -4The type alias completion_signatures is a specialization of execution::completion_signatures with the template arguments (in unspecified order): - -(4.1)set_value_t() if T is void, and set_value_t(T) otherwise; -(4.2)template arguments of the specialization of execution::completion_signatures denoted by error_types; and -(4.3)set_stopped_t(). -5allocator_type shall meet the Cpp17Allocator requirements. - -9.4.3 Task Members [task.members] -task(task&& other) noexcept; -1Effects: Initializes handle with exchange(other.handle, {}). - -~task(); -2Effects: Equivalent to: - - if (handle) - handle.destroy(); -template -state connect(R&& recv); -3Preconditions: bool(handle) is true. - -4Effects: Equivalent to: return state(exchange(handle, {}), std::forward(recv)); - -9.4.4 Class template task::state [task.state] -namespace std::execution { - template - template - class task::state { // exposition only - public: - using operation_state_concept = operation_state_t; - - template - state(coroutine_handle h, Rcvr&& rr); - ~state(); - void start() & noexcept; - -private: - using own-env-t = see below; // exposition only - coroutine_handle handle; // exposition only - remove_cvref_t rcvr; // exposition only - own-env-t own-env; // exposition only - Environment environment; // exposition only - }; -} -1The type own-env-t is Environment::template env_type()))> if that qualified-id is valid and denotes a type, env<> otherwise. - -template -state(coroutine_handle h, Rcvr&& rr); -2Effects: Initializes - -(2.1)handle with std::move(h); -(2.2)rcvr with std::forward(rr); -(2.3)own-env with own-env-t(get_env(rcvr)) if that expression is valid and own-env-t() otherwise; If neither of these expressions is valid, the program is ill-formed. -(2.4)environment with Environment(own-env) if that expression is valid, otherwise Environment(get_env(rcvr)) if this expression is valid, otherwise Environment(). If neither of these expressions is valid, the program is ill-formed. -~state(); -3Effects: Equivalent to: - - if (handle) - handle.destroy(); -void start() & noexcept; -4Effects: Let prom be the object handle.promise(). Associates STATE(prom), RCVR(prom), and SCHED(prom) with *this as follows: - -(4.1)STATE(prom) is *this. -(4.2)RCVR(prom) is rcvr. -(4.3)SCHED(prom) is the object initialized with scheduler_type(get_scheduler(get_env(rcvr))) if that expression is valid and scheduler_type() otherwise. If neither of these expressions is valid, the program is ill-formed. -Let st be get_stop_token(get_env(rcvr)). Initializes prom.token and prom.source such that - -(4.4)prom.token.stop_requested() returns st.stop_requested(); -(4.5)prom.token.stop_possible() returns st.stop_possible(); and -(4.6)for types Fn and Init such that both invocable and constructible_from are modeled, stop_token_type::callback_type models stoppable-callback-for. -After that invokes handle.resume(). - -9.4.5 Class task::promise_type [task.promise] -namespace std::execution { - template - struct with_error { - using type = remove_cvref_t; - type error; - }; - template - with_error(E) -> with_error; - - template - struct change_coroutine_scheduler { - using type = remove_cvref_t; - type scheduler; - }; - template - change_coroutine_scheduler(Sch) -> change_coroutine_scheduler; - - template - class task::promise_type { - public: - template - promise_type(const Args&... args); - - task get_return_object() noexcept; - - auto initial_suspend() noexcept; - auto final_suspend() noexcept; - - void uncaught_exception(); - coroutine_handle<> unhandled_stopped(); - - void return_void(); // present only if is_void_v is true; - template - void return_value(V&& value); // present only if is_void_v is false; - - template - unspecified yield_value(with_error error); - - template - auto await_transform(A&& a); - template - auto await_transform(change_coroutine_scheduler sch); - - unspecified get_env() const noexcept; - - template - void* operator new(size_t size, Args&&... args); - - void operator delete(void* pointer, size_t size) noexcept; - - private: - using error-variant = see below; // exposition only - - allocator_type alloc; // exposition only - stop_source_type source; // exposition only - stop_token_type token; // exposition only - optional result; // exposition only; present only if is_void_v is false; - error-variant errors; // exposition only - }; -} -1Let prom be an object of promise_type and let tsk be the task object created by prom.get_return_object(). The description below refers to objects STATE(prom), RCVR(prom), and SCHED(prom) associated with tsk during evalutation of task::state::start for some receiver Rcvr [task.state]. - -2error-variant is a variant...>, with duplicate types removed, where E... are template arguments of the specialization of execution::completion_signatures denoted by error_types. - -template -promise_type(const Args&... args); -3Mandates: The first parameter of type allocator_arg_t (if any) is not the last parameter. - -4Effects: If Args contains an element of type allocator_arg_t then alloc is initialized with the corresponding next element of args. Otherwise, alloc is initialized with allocator_type(). - -task get_return_object() noexcept; -5Returns: A task object whose member handle is coroutine_handle::from_promise(*this). - -auto initial_suspend() noexcept; -6Returns: An awaitable object of unspecified type ([expr.await]) whose member functions arrange for - -(6.1)the calling coroutine to be suspended, -(6.2)the coroutine to be resumed on an execution agent of the execution resource associated with SCHED(*this). -auto final_suspend() noexcept; -7Returns: An awaitable object of unspecified type ([expr.await]) whose member functions arrange for the completion of the asynchronous operation associated with STATE(*this) by invoking: - -(7.1)set_error(std::move(RCVR(*this)), std::move(e)) if errors.index() is greater than zero and e is the value held by errors, otherwise -(7.2)set_value(std::move(RCVR(*this))) if is_void is true, and otherwise -(7.3)set_value(std::move(RCVR(*this)), *result). -template -auto yield_value(with_error err); -8Mandates std::move(err.error) is convertible to exactly one of the set_error_t argument types of error_types. Let Cerr be that type. - -9Returns: An awaitable object of unspecified type ([expr.await]) whose member functions arrange for the calling coroutine to be suspended and then completes the asynchronous operation associated with STATE(*this) by invoking set_error(std::move(RCVR(*this)), Cerr(std::move(err.error))). - -template -auto await_transform(Sender&& sndr) noexcept; -10Returns: If same_as is true returns as_awaitable(std::forward(sndr), *this); otherwise returns as_awaitable(affine_on(std::forward(sndr), SCHED(*this)), *this). - -template -auto await_transform(change_coroutine_scheduler sch) noexcept; -11Effects: Equivalent to: returns await_transform(just(exchange(SCHED(*this), scheduler_type(sch.scheduler))), *this); - -void uncaught_exception(); -12Effects: If the signature set_error_t(exception_ptr) is not an element of error_types, calls terminate() (14.6.2 [except.terminate]). Otherwise, stores current_exception() into errors. - -coroutine_handle<> unhandled_stopped(); -13Effects: Completes the asynchronous operation associated with STATE(*this) by invoking set_stopped(std::move(RCVR(*this))). - -14Returns: noop_coroutine(). - -unspecified get_env() const noexcept; -15Returns: An object env such that queries are forwarded as follows: - -(15.1)env.query(get_scheduler) returns scheduler_type(SCHED(*this)). -(15.2)env.query(get_allocator) returns alloc. -(15.3)env.query(get_stop_token) returns token. -(15.4)For any other query q and arguments a... a call to env.query(q, a...) returns STATE(*this).environment.query(q, a...) if this expression is well-formed and forwarding_query(q) is well-formed and is true. Otherwise env.query(q, a...) is ill-formed. -template -void* operator new(size_t size, const Args&... args); -16If there is no parameter with type allocator_arg_t then let alloc be Allocator(). Let arg_next be the parameter following the first allocator_arg_t parameter (if any) and let alloc be Allocator(arg_next). Then PAlloc is allocator_traits::template rebind_alloc where U is an unspecified type whose size and alignment are both __STDCPP_DEFAULT_NEW_ALIGNMENT__. - -17Mandates: - -(17.1)The first parameter of type allocator_arg_t (if any) is not the last parameter. -(17.2)Allocator(arg_next) is a valid expression if there is a parameter of type allocator_arg_t. -(17.3)allocator_traits::pointer is a pointer type. -18Effects: Initializes an allocator palloc of type PAlloc with alloc. Uses palloc to allocate storage for the smallest array of U sufficient to provide storage for a coroutine state of size size, and unspecified additional state necessary to ensure that operator delete can later deallocate this memory block with an allocator equal to palloc. - -19Returns: A pointer to the allocated storage. - -void operator delete(void* pointer, size_t size) noexcept; -20Preconditions: pointer was returned from an invocation of the above overload of operator new with a size argument equal to size. - -21Effects: Deallocates the storage pointed to by pointer using an allocator equal to that used to allocate it. \ No newline at end of file diff --git a/context/research/p3826.md b/context/research/p3826.md deleted file mode 100644 index a933999..0000000 --- a/context/research/p3826.md +++ /dev/null @@ -1,2550 +0,0 @@ -Fix or Remove Sender Algorithm Customization -Document #: P3826R2 [Latest] [Status] -Date: 2025-11-07 -Project: Programming Language C++ -Audience: SG1 Concurrency and Parallelism Working Group -LEWG Library Evolution Working Group -LWG Library Working Group -Reply-to: Eric Niebler - -Contents -1 Background -2 The problem with P3718 -2.1 An illustrative example -3 Solutions considered -3.1 Remove all of the C++26 std::execution additions -3.2 Remove all of the customizable sender algorithms -3.3 Remove sender algorithm customization -3.4 Ship everything as-is and fix algorithm customization in a DR -3.5 Fix algorithm customization now -4 Fixing algorithm customization -4.1 Determining the starting and completing domains -4.1.1 get_completion_domain -4.2 Algorithm dispatching in connect -4.2.1 Solving the double-dispatch problem -4.2.2 transform_sender -4.3 Revisiting the problematic example -4.4 Customizing continues_on and schedule_from -4.4.1 Background -4.4.2 A simplification -4.5 inline_scheduler improvements -4.6 Indeterminate domains -4.7 The procedure for the fix -4.8 Proposed wording, option 1: Fix algorithm customization -5 Removing algorithm customization -5.1 The parallel scheduler -5.2 The task scheduler -5.3 The bulk algorithms -5.3.1 Option 1: Remove bulk, bulk_chunked, and bulk_unchunked -5.3.2 Option 2: Magical parallel execution -5.3.3 Option 3: A normative mechanism for the bulk* algorithms only -5.4 Impacts on hardware vendors -5.5 Mitigating factors -5.5.1 Sender introspection -5.5.2 Third party algorithms -5.5.3 Difficulties with proprietary extensions -5.6 The removal process -5.6.1 Approach -5.6.2 Procedure -5.7 Restoring algorithm customization in C++29 -5.7.1 Completion scheduler enhancements -5.7.2 Domains -5.7.3 Customizing connect -5.7.4 The parallel and task schedulers and bulk -5.8 Proposed wording, option 2: Remove algorithm customization -6 Appendix A: Listing for updated transform_sender -7 References -1 Background -In the current Working Draft, 33 [exec] has sender algorithms that are customizable. While the sender/receiver concepts and the algorithms themselves have been stable for several years now, the customization mechanism has seen a fair bit of recent churn. [P3718R0] is the latest effort to shore up the mechanism. Unfortunately, there are gaps in its proposed resolution. This paper details those gaps. - -The problem and its solution are easy to describe, but the changes are not trivial. The fix has been implemented once, and another independent implementation is in progress at the time of writing. This paper proposes to fix the issue for C++26. Should the fix be deemed too risky, this paper also offers guidance on how to remove the ability to customize sender algorithms for C++26 in a way that permits us to add it back in C++29. - -2 The problem with P3718 -[P3718R0] identifies real problems with the status quo of sender algorithm customization. It proposes using information from the sender about where it will complete during “early” customization, which happens when a sender algorithm constructs and returns a sender; and it proposes using information from the receiver about where the operation will start during “late” customization, when the sender and the receiver are connected. - -The problem with this separation of responsibilities is: - -Many senders do not know where they will complete until they know where they will be started. -A simple example is the just() sender; it completes inline wherever it is started. The information about where a sender will start is not known during early customization, when the sender is being asked for this information. - -And even if we knew where the sender will start, there is no generic interface for asking a sender where it will complete given where it will start. There currently is no such API, which is the whole problem in a nutshell. - -2.1 An illustrative example -This section illustrates the above problem by walking through the algorithm selection process proposed by P3718. Consider the following example: - -namespace ex = std::execution; -auto sndr = ex::starts_on(gpu, ex::just()) | ex::then(fn); -std::this_thread::sync_wait(std::move(sndr)); -… where gpu is a scheduler that runs work (unsurprisingly) on a GPU. - -fn will execute on the GPU, so a GPU implementation of then should be used. By the proposed resolution of P3718, algorithm customization proceeds as follows: - -During early customization, when starts_on(gpu, just()) | then(fn) is executing, the then CPO asks the starts_on(gpu, just()) sender where it will complete as if by: - -auto& [_, fn, child1] = *this; // *this is a then sender - // child1 is a starts_on sender -auto dom1 = ex::get_domain(ex::get_env(child1)); -The starts_on sender will in turn ask the just() sender, as if by: - -auto& [_, sch, child2] = *this; // *this is a starts_on sender - // child2 is a just sender -auto dom2 = ex::get_domain(ex::get_env(child2)); -As discussed, the just() sender doesn’t know where it will complete until it knows where it will be started, but that information is not yet available. As a result, dom2 ends up as default_domain, which is then reported as the domain for the starts_on sender. That’s incorrect. The starts_on sender will complete on the GPU. - -The then CPO uses default_domain to find an implementation of the then algorithm, which will find the default implementation. As a result, the then CPO returns an ordinary then sender. - -When that then sender is connected to sync_wait’s receiver, late customization happens. connect asks sync_wait’s receiver where the then sender will be started. It does that with the query get_domain(get_env(rcvr)). sync_wait starts operations on the current thread, so the get_domain query will return default_domain. As with early customization, late customization will also not find a GPU implementation. - -The end result of all of this is that a default (which is effectively a CPU) implementation will be used to evaluate the then algorithm on the GPU. That is a bad state of affairs. - -3 Solutions considered -Here is a list of possible ways to address this problem for C++26, sorted in descending awfulness. - -3.1 Remove all of the C++26 std::execution additions -Although the safest option, I hope most agree that such a drastic step is not warranted by this issue. Pulling the sender abstraction and everything that depends on it would result in the removal of: - -The sender/receiver-related concepts and customization points, without which the ecosystem will have no shared async abstraction, and which will set back the adoption of structured concurrency three years. - -The sender algorithms, which capture common async patterns and make them reusable, - -execution::counting_scope and execution::simple_counting_scope, and related features for incremental adoption of structured concurrency, - -execution::parallel_scheduler and all of its related APIs, and - -execution::task and execution::task_scheduler (C++26 will still not have a standard coroutine task type ). - -This option should only be considered if all the other options are determined to have unacceptable risk. - -3.2 Remove all of the customizable sender algorithms -This option would keep all of the above library components with the exception of the customizable sender algorithms: - -then, upon_error, upon_stopped -let_value, let_error, let_stopped -bulk, bulk_chunked, bulk_unchunked -starts_on, continues_on, on -when_all, when_all_with_variant -stopped_as_optional, stopped_as_error -into_variant -sync_wait -affine_on -This would leave users with no easy standard way to start work on a given execution context, or transition to another execution context, or to execute work in parallel, or to wait for work to finish. - -In fact, without the bulk algorithms, we leave no way for the parallel_scheduler to execute work in parallel! - -While still delivering a standard async abstraction with minimal risk, the loss of the algorithms would make it just an abstraction. Like coroutines, adoption of senders as an async lingua franca will be hampered by lack of standard library support. - -3.3 Remove sender algorithm customization -In this option, we ship everything currently in the Working Draft but remove the ability to customize the algorithms. This gives us a free hand to design a better customization mechanism for C++29 – provided we have confidence that those new customization hooks can be added without break existing behavior. - -This option is not as low-risk as it may seem. Firstly, it is difficult to be confident that algorithm customization can be added back without breaking code. Improved customization hooks have been implemented, and wording for the removal has been written, to the best of the author’s ability, such that that the new hooks can be standardized without breaking changes. - -Secondly, algorithm customizability is a load-bearing feature. Taking it out is not hard but it isn’t trivial either. Customizability is used by the parallel_scheduler to accelerate the bulk family of algorithms. Although the task_scheduler does not currently customize bulk, it should. Some design work is necessary before algorithm customization can be removed. - -The section Removing algorithm customization describes the effects and possible remedies of the removal option. - -3.4 Ship everything as-is and fix algorithm customization in a DR -This option is not as reckless as it sounds. We have a fix and the fix has been implemented in a working and publicly available execution library (CCCL). It would not be the first time the Committee shipped a standard with known defects, and the DR process exists for just this purpose. - -One potential problem is that, as DRs go, this one would be large-ish. I do not know if this presents a problem procedurally. If it does, then fixing the problem now would make more sense. Any future DRs are likely to be smaller. - -3.5 Fix algorithm customization now -This is the option this paper proposes. The fix is easy to describe: - -When asking a sender where it will complete, tell it where it will start. -That is done by passing the receiver’s environment when asking the sender for its completion domain. Instead of get_domain(get_env(sndr)), the query would be get_domain(get_env(sndr), get_env(rcvr)) (but with a query other than get_domain, read on). - -That change has some ripple effects, the biggest of which is that the receiver is not known during early customization. Therefore, early customization is irreparably broken and must be removed. - -There are no algorithms in std::execution that are affected by the removal of early customization since they all do their work lazily. Should a future algorithm be added that eagerly connects a sender, that algorithm should accept an optional execution environment by which users can provide the starting domain. That is not onerous. - -There are other ripples from the proposed change. They are described in full detail in section Fixing algorithm customization. - -There are risks with trying to fix the problem now. It is a design change happening uncomfortably close to the release of C++26. One mitigating factor is that it is unlikely that the fix would make things more broken than they already are. If there are lingering problems, they could be fixed with the usual DR process. - -This fix has been implemented in NVIDIA’s CCCL library. At the time of writing a second implementation is being developed independently for stdexec, the std::execution reference implementation with an eye to having a few months deployment experience prior to the upcoming London meeting. - -4 Fixing algorithm customization -Selecting the right implementation of an algorithm requires requires two things: - -Identifying the starting and completing domain of the algorithm’s async operation, and - -Using that information to select the preferred implementation for the algorithm that operation represents. - -Let’s take these two separately. - -4.1 Determining the starting and completing domains -As described in Fix algorithm customization now, so-called “early” customization, which determines the return type of then(sndr, fn) for example, is irreparably broken. It needs the sender to know where it will complete, which it can’t in general. - -So the first step is to remove early customization. There is no plan to add it back later. - -That leaves “late” customization, which is performed by the connect customization point. The receiver, which is an extension of caller, knows where the operation will start. If the sender is given this information – that is, if the sender is told where it will start – it can accurately report where it will complete. This is the key insight. - -When connect queries a sender’s attributes for its completion domain, it should pass the receiver’s environment. That way a sender has all available information when computing its completion domain. - -4.1.1 get_completion_domain -It is sometimes the case that a sender’s value and error completions can happen on different domains. For example, imagine trying to schedule work on a GPU. If it succeeds, you are in the GPU domain, Bob’s your uncle. If scheduling fails, however, the error cannot be reported on the GPU because we failed to make it there! - -So asking a sender for a singular completion domain is not flexible enough. - -When asking for a completion scheduler, we have three queries, one for each completion disposition: get_completion_scheduler. Similarly, we should have three separate queries for a sender’s completion domain: get_completion_domain. - -ASIDEIf we have the get_completion_scheduler queries, why do we need get_completion_domain? We can ask the completion scheduler for its domain, right? The answer is that there are times when a sender’s completion domain is knowable but the completion scheduler is not. E.g., when_all(s1, s2) completes on the completion scheduler of either s1 or s2, so its completion scheduler is indeterminate. But if s1 and s2 have the same completion domain, then we know that when_all will complete in that domain. - -The addition of the completion domain queries creates a nice symmetry as shown in the table below (with additions in green): - -Receiver -Sender -Query for scheduler get_scheduler get_completion_scheduler -get_completion_scheduler -get_completion_scheduler -Query for domain get_domain get_completion_domain -get_completion_domain -get_completion_domain -For a sender sndr and an environment env, we can get the sender’s completion domain as follows: - -auto completion_domain = get_completion_domain(get_env(sndr), env); -A sender like just() would implement this query as follows: - -struct just_attrs -{ - auto query(get_completion_domain_t, const auto& env) const noexcept - { - // an inline sender completes where it starts. the domain of the environment is where - // the sender will start, so return that. - return get_domain(env); - } - //... -}; - -template -struct just_sender -{ - //... - auto get_env() const noexcept - { - return just_attrs{}; - } - //... -}; -NoteA query that accepts an additional argument is novel in std::execution, but the query system was designed to support this usage. See 33.2.2 [exec.queryable.concept]. - -Just as the get_completion_domain queries accept an optional env argument, so too should the get_completion_scheduler queries. - -4.2 Algorithm dispatching in connect -With the addition of the get_completion_domain<*> queries that can accept the receiver’s environment, connect can now know the starting and completing domains of the async operation it is constructing. When passed arguments sndr and rcvr, the starting domain is: - -// Get the operation's starting domain: -auto starting_domain = get_domain(get_env(rcvr)); -To get the completion domain (when the operation completes successfully): - -// Get the operation's completion domain for the value channel: -auto completion_domain = get_completion_domain(get_env(sndr), get_env(rcvr)); -Now connect has all the information it needs to select the correct algorithm implementation. Great! - -But this presents the connect function with a dilemma: how does it use two domains to pick one algorithm implementation? - -Consider that the starting domain might want a say in how start works, and the completing domain might want a say in how set_value works. So should we let the starting domain customize start and the completing domain customize set_value? - -No. start and set_value are bookends around an async operation; they must match. Often set_value needs state that is set up in start. Customizing the two independently is madness. - -4.2.1 Solving the double-dispatch problem -The solution is to use sender transforms. Each domain can apply its transform in turn. I do not have reason to believe the order matters, but it is important that when asked to transform a sender, a domain knows whether it is the “starting” domain or the “completing” domain. - -Here is how a domain might customize bulk when it is the completing domain: - -struct thread_pool_domain -{ - template Sndr, class Env> - auto transform_sender(set_value_t, Sndr&& sndr, const Env& env) const - { - //... - } -}; -Since it has set_value_t as its first argument, this transform is only applied when thread_pool_domain is an operation’s completion domain. Had the first argument been start_t, the transform would only be used when thread_pool_domain is a starting domain. - -4.2.2 transform_sender -In this proposed design, the connect CPO does a few things: - -Determines the starting and completing domains, - -Applies the completing domain’s transform (if any), - -Applies the starting domain’s transform (if any) to the resulting sender, - -Connnects the twice-transformed sender to the receiver. - -The first three steps are doing something different than connecting a sender and receiver, so it makes sense to factor them out into their own utility. As it so happens we already have such a utility: transform_sender. - -The proposal requires some changes to how transform_sender operates. This new transform_sender still accepts a sender and an environment, but it no longer accepts a domain. It computes the two domains and applies the two transforms, recursing if a transform changes the type of the sender. - -A possible implementation of transform_sender is listed in Appendix A: Listing for updated transform_sender. - -With the definition of transform_sender in Appendix A, connect(sndr, rcvr) is equivalent to transform_sender(sndr, get_env(rcvr)).connect(rcvr) (except rcvr is evaluated only once). - -4.3 Revisiting the problematic example -Let’s see how this new approach addresses the problems noted in the motivating example above. The troublesome code is: - -namespace ex = std::execution; -auto sndr = ex::starts_on(gpu, ex::just()) | ex::then(fn); -std::this_thread::sync_wait(std::move(sndr)); -An illustrative example describes how the current design and the “fixed” one proposed in [P3718R0] go off the rails while determining the domain in which the function fn will execute, causing it to use a CPU implementation instead of a GPU one. - -In the new design, when the then sender is being connected to sync_wait’s receiver, the starting domain will still be the default_domain, but when asking the sender where it will complete, the answer will be different. Let’s see how: - -When asked for its completion domain, the then sender will ask the starts_on sender where it will complete, as if by: - -auto& [_, fn, child1] = *this; // *this is a then sender - // child1 is a starts_on sender -auto dom1 = ex::get_completion_domain(ex::get_env(child1), ex::get_env(rcvr)); -In turn, the starts_on sender asks the just sender where it will complete, telling it where it will start. (This is the new bit.) It looks like: - -auto& [_, sch, child2] = *this; // *this is a starts_on sender - // child2 is a just sender -// ask for the scheduler's completion domain: -auto sch_dom = ex::get_completion_domain(sch, get_env(rcvr)); -// construct an env that reflects the fact that child2 will be started on sch: -auto env2 = ex::env{ex::prop{ex::get_scheduler, sch}, - ex::prop{ex::get_domain, sch_dom}, - ex::get_env(rcvr)}; -// pass the new env when asking child2 for its completion domain: -auto dom2 = ex::get_completion_domain(ex::get_env(child2), env2); -The just sender, when asked where it will complete, will respond with the domain on which it is started. In other words, get_completion_domain(get_env(just()), env2) will return get_domain(env2), which is sch_dom. - -Having correctly determined that the then sender will start on the default domain and complete on the GPU domain, connect can select the right implementation for the then algorithm. It does that by calling: - -return ex::transform_sender(forward(sndr), ex::get_env(rcvr)).connect(forward(rcvr)); -The transform_sender call will execute the following (simplified): - -ex::default_domain().transform_sender( - ex::start, - gpu_domain().transform_sender(ex::set_value, sndr, ex::get_env(rcvr)), - ex::get_env(rcvr)) -where gpu_domain is the domain of the gpu scheduler. The default_domain does not apply any transformation to then senders, so this expression reduces to: - -gpu_domain().transform_sender(ex::set_value, sndr, ex::get_env(rcvr)) -So, in the new customization scheme, the GPU domain gets a crack at transforming the then sender before it is connected to a receiver, as it should. - -4.4 Customizing continues_on and schedule_from -One of the uglier parts of the current algorithm customization design is that it needs special case handling for the continues_on and schedule_from algorithms. The proposed design gives us an opportunity to clean this up significantly. - -4.4.1 Background -In order to transition between two execution contexts, each of which may know nothing about the other, it is necessary to do it in two steps: a transition from a context to the default domain, and a transition from the default domain to another context. A scheduler can customize either or both of these two steps by customizing the continues_on and schedule_from algorithms. - -The customization of continues_on is found using the completion domain of sndr, making it the sanctioned way to transition off of a context. schedule_from finds its customization using the domain of sch, making it useful for transitioning onto a context. - -When not customized, connecting the sender continues_on(sndr, sch) performs a switcheroo and connects schedule_from(sch, sndr) instead. In that way, both the source and destination contexts get a say in how the execution transfer is mediated. - -What if a domain wants to customize continues_on? Asking continues_on(sndr, sch) for its completion domain will yield the domain of sch, but continues_on wants to use the completion domain of sndr. The usual transform_sender mechanism does not seem to cut it. - -This is handled in the current draft wording by making continues_on a special case in transform_sender. In [P3718R0], the special casing is replaced with a get_domain_override attribute query, by which the continues_on sender can force transform_sender to use a different domain. - -Both of these solutions are hacks. - -4.4.2 A simplification -A better way to solve this problem is to divide responsibilities differently between continues_on and schedule_from. Suppose that only continues_on transfers execution, and schedule_from does nothing and only exists so it can be customized. The continues_on customization point would look like: - -constexpr pipeable-adaptor continues_on = - [](this auto self, Sndr&& sndr, Sch sch) - { - return make-sender(self, sch, schedule_from(forward(sndr))); - }; -The schedule_from customization point would look like this: - -constexpr auto schedule_from = - [](this auto self, Sndr&& sndr) - { - return make-sender(self, {}, forward(sndr)); - }; -Semantically, schedule_from(sndr) is equivalent to sndr. Crucially, that means that schedule_from(sndr) has the same completion domain as sndr. And that makes schedule_from a great way to customize how to transition off of an execution context. - -On the other hand, continues_on(sndr, sch) completes on the domain of sch, making it a great way to customize how to transition onto an execution context. - -By splitting continues_on and schedule_from in this way, we obviate the need for any special cases or domain overrides. The usual transform_sender mechanism is sufficient. - -In the CCCL project, I have implemented this design and ported my CUDA stream scheduler to use it. I needed to customize schedule_from for the CUDA stream scheduler to mediate the execution transfer from the GPU back to the CPU. Besides the bulk algorithms, schedule_from is the only algorithm the GPU scheduler needs to customize. - -NOTE Carving the two algorithms this way flips how they are dispatched. continues_on now dispatches based on the domain of sch, and schedule_from on the completion domain of the predecessor sender. The author believes this is a far more sensible arrangement. - -4.5 inline_scheduler improvements -The suggestion above to extend the get_completion_scheduler<*> query presents an intriguing possibility for the inline_scheduler: the ability for it to report the scheduler on which its scheduling operations complete! - -Consider the sender schedule(inline_scheduler()). Ask it where it completes today and it will say, “I complete on the inline_scheduler.”, which isn’t terribly useful. However, if you ask it, “Where will you complete – and by the way you will be started on the parallel_scheduler?”, now that sender can report that it will complete on the parallel_scheduler. - -The result is that code that uses the inline_scheduler will no longer cause the actual scheduler to be hidden. - -This realization is the motivation behind the change to strike the get_completion_scheduler(get_env(schedule(sch))) requirement from the scheduler concept. We want that expression to be ill-formed for the inline_scheduler. Instead, we want the following query to be well-formed: - -get_completion_scheduler(get_env(schedule(inline_scheduler())), get_env(rcvr)) -That expression should be equivalent to get_scheduler(get_env(rcvr)), which says that the sender of inline_scheduler completes wherever it is started. - -4.6 Indeterminate domains -When computing completion domains, it is sometimes the case that an operation can complete on domain A or domain B for a given disposition (value, error, or stopped). Imagine such a sender with an indeterminate completion domain for set_value. How does algorithm customization work in that case? - -First, we recognize that very few algorithms will ever be customized; a given domain may only customize a handful. Given sndr | then(fn), there is no difficulty picking the implementation for then even if sndr can complete successfully on either domain A or B, provided neither domain customizes then. - -That insight makes it advantageous for a sender to report all the domains on which it might complete for a particular completion channel. It can do that with a new domain type: indeterminate_domain, which looks like this: - -template -struct indeterminate_domain -{ - template - static constexpr auto transform_sender(Tag, Sndr&& sndr, const Env& env) - { - // Mandates: for all D in Domains, the expression - // D().transform_sender(Tag(), forward(sndr), env) is either ill-formed or else - // has the same type as - // default_domain().transform_sender(Tag(), forward(sndr), env) - return default_domain().transform_sender(Tag(), forward(sndr), env); - } -}; -Given an environment e, a sender like when_all(sndrs...) would have a value completion domain of - -COMMON-DOMAIN(COMPL-DOMAIN(set_value_t, sndrs, e)...) -where: - -COMPL-DOMAIN(T, S, E) is the type of get_completion_domain(get_env(S), E) if that expression is well-formed, or indeterminate_domain<>() otherwise, and - -COMMON-DOMAIN(Ds...) is common_type_t if that expression is well-formed, and indeterminate_domain otherwise. - -The final piece is to specialize common_type such that indeterminate_domain and indeterminate_domain have a common type of indeterminate_domain, and such that common_type_t, D> is indeterminate_domain. - -4.7 The procedure for the fix -The steps for fixing algorithm customization are detailed below. - -Remove the uses of transform_sender in the sender adaptor algorithm customization points (33.9.12 [exec.adapt]). Directly return the result of calling make-sender rather than passing it to transform_sender. - -Remove the exposition-only helpers: - -completion-domain (33.9.2 [exec.snd.expos]/8-9), -get-domain-early (33.9.2 [exec.snd.expos]/13), and -get-domain-late (33.9.2 [exec.snd.expos]/14). -Add the get_completion_domain queries: - -get_completion_domain -get_completion_domain -get_completion_domain -Change the get_completion_scheduler queries to accept an optional environment argument. - -Make the get_domain(env) query smarter by falling back to the current scheduler’s domain if env.query(get_domain) is ill-formed, and falling back further to default_domain() if env does not have a current scheduler. - -Restore the ability of env<...>::query to accept additional arguments. - -Rename the current schedule_from algorithm to continues_on and change it to return make-sender(continues_on, sch, schedule_from(sndr)), where schedule_from is a new algorithm such that schedule_from(sndr) is equivalent to make-sender(schedule_from, {}, sndr). - -Remove the (unused) transform_env function and the transform_env members of the sender algorithm CPOs and from default_domain. - -Change transform_sender from transform_sender(Domain, Sndr, Env...) to transform_sender(Sndr, Env). Have it compute the sender’s starting and completing domains and apply their transforms to Sndr as shown in Appendix A: Listing for updated transform_sender. - -Update the usages of transform_sender in connect and get_completion_signatures to reflect its new signature. - -For the transform_sender member functions in the sender algorithm CPOs, add set_value_t, in the front of their parameter list. Parameterize the transform_sender member in default_domain with a leading Tag parameter. - -Add a class template indeterminate_domain as described in Indeterminate domains. - -Update the attributes of the sender algorithms to properly report their completion schedulers and completion domains given an optional env argument. Also update the inline_scheduler and its schedule-sender to compute their completion scheduler and domain from the extra env argument. - -From the scheduler concept, replace the required expression - -{ auto(get_completion_scheduler(get_env(schedule(std::forward(sch))))) } - -> same_as>; -with a semantic requirement that if the above expression is well-formed – which it is for the parallel_scheduler, the task_scheduler, and run_loop’s scheduler – then it shall compare equal to sch. (See inline_scheduler improvements for the motivation behind these changes.) - -Simplify the on algorithm, which no longer needs to use write_env to make child operations aware of the current scheduler and domain. - -4.8 Proposed wording, option 1: Fix algorithm customization -[ Editor's note: Introduce the notion of “execution domains” by changing 33.3 [exec.async.ops#1] as follows: ] - -[ Editor's note: In 33.4 [execution.syn], make the following changes: ] - -… as before … - -namespace std::execution { - // [exec.queries], queries - struct get_domain_t { unspecified }; - struct get_scheduler_t { unspecified }; - struct get_delegation_scheduler_t { unspecified }; - struct get_forward_progress_guarantee_t { unspecified }; - template - struct get_completion_scheduler_t { unspecified }; - template - struct get_completion_domain_t { unspecified }; - struct get_await_completion_adaptor_t { unspecified }; - - inline constexpr get_domain_t get_domain{}; - inline constexpr get_scheduler_t get_scheduler{}; - inline constexpr get_delegation_scheduler_t get_delegation_scheduler{}; - enum class forward_progress_guarantee; - inline constexpr get_forward_progress_guarantee_t get_forward_progress_guarantee{}; - template - constexpr get_completion_scheduler_t get_completion_scheduler{}; - template - constexpr get_completion_domain_t get_completion_domain{}; - inline constexpr get_await_completion_adaptor_t get_await_completion_adaptor{}; - - struct get_env_t { unspecified }; - inline constexpr get_env_t get_env{}; - - template - using env_of_t = decltype(get_env(declval())); - - // [exec.prop], class template prop - template - struct prop; - - // [exec.env], class template env - template - struct env; - - // [exec.domain.indeterminate], execution domains - template - struct indeterminate_domain; - - // [exec.domain.default], execution domains - struct default_domain; - -… as before … - - template - using tag_of_t = see below; - - // [exec.snd.transform], sender transformations - template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender( - Domain dom, Sndr&& sndr, const Env&... env) noexcept(see below); - - // [exec.snd.transform.env], environment transformations - template - constexpr queryable decltype(auto) transform_env( - Domain dom, Sndr&& sndr, Env&& env) noexcept; - - // [exec.snd.apply], sender algorithm application - template - constexpr decltype(auto) apply_sender( - Domain dom, Tag, Sndr&& sndr, Args&&... args) noexcept(see below); - - // [exec.connect], the connect sender algorithm - struct connect_t; - inline constexpr connect_t connect{}; - -… as before … -[ Editor's note: Before subsection 33.5.1 [exec.fwd.env], insert a new subsection with stable name [exec.queries.expos] as follows: ] - -4.8.1 Query utilities [exec.queries.expos] -[exec.queries] makes use of the following exposition-only entities. - -For subexpressions q and tag and pack args, let TRY-QUERY(q, tag, args...) be expression-equivalent to AS-CONST(q).query(tag, args...) if that expression is well-formed, and AS-CONST(q).query(tag) otherwise. - -For subexpressions q and tag and pack args, let HIDE-SCHED(q) be an object o such that o.query(tag, args...) is ill-formed when the decayed type of tag is get_scheduler_t or get_domain_t, and o.query(tag, args...) otherwise. - -[ Editor's note: Change [exec.get.domain] as follows: ] - -execution​::​get_domain [exec.get.domain] -get_domain asks a queryable object for its associated execution domain tag. - -The name get_domain denotes a query object. For a subexpression env, get_domain(env) is expression-equivalent to MANDATE-NOTHROW(D()), where D is the type of the first of the following expressions that is well-formed [ Editor's note: Reformatted as a list. ] - -(2.1)MANDATE-NOTHROW(auto(AS-CONST(env).query(get_domain)) -(2.2)get_completion_domain(get_scheduler(env), HIDE-SCHED(env)) - -(2.3)default_domain() - -forwarding_query(execution​::​get_domain) is a core constant expression and has value true. - -[ Editor's note: Change subsection 33.5.6 [exec.get.scheduler] as follows: ] - -get_scheduler asks a queryable object for its associated scheduler. - -The name get_scheduler denotes a query object. For a subexpression env, get_scheduler(env) is expression-equivalent to - -get_completion_scheduler(MANDATE-NOTHROW(AS-CONST(env).query(get_scheduler)), HIDE-SCHED(env)) -Mandates: If the expression above is well-formed, its type satisfies scheduler. - -forwarding_query(execution​::​get_scheduler) is a core constant expression and has value true. - -[ Editor's note: Change subsection 33.5.9 [exec.get.compl.sched] as follows: ] - -execution​::​get_completion_scheduler [exec.get.compl.sched] -get_completion_scheduler obtains the completion scheduler associated with a completion tag from a sender’s attributes. -For subexpression sch1 and pack envs, let sch2 be TRY-QUERY(sch, get_completion_scheduler, envs...) and let QUERY-RECURSE(sch1, envs...) be expression-equivalent to QUERY-RECURSE(sch2, envs...) if that expression is well-formed and sch1 and sch2 have different types or compare unequal; otherwise, sch. -The name get_completion_scheduler denotes a query object template. For a subexpression q and pack envs, the expression get_completion_scheduler(q, envs...) is ill-formed if completion-tag is not one of set_value_t, set_error_t, or set_stopped_t. Otherwise, get_completion_scheduler(q, envs...) is expression-equivalent to - -MANDATE-NOTHROW(AS-CONST(q).query(get_completion_scheduler)) -(3.1)MANDATE-NOTHROW(RECURSE-QUERY(TRY-QUERY(q, get_completion_scheduler, envs...), envs...)) if that expression is well-formed. - -(3.2)Otherwise, auto(q) if the type of q satisfies scheduler and sizeof...(envs) != 0 is true. - -(3.3)Otherwise, get_completion_scheduler(q, envs...) is ill-formed. - -Mandates: If the expression above get_completion_scheduler(q, envs...) is well-formed, its type satisfies scheduler. - -For a type Tag, subexpression sndr, and pack env, let CS be completion_signatures_of_t, decltype((env))...>. If both get_completion_scheduler(get_env(sndr), env...) and CS are well-formed and CS().count-of(Tag()) == 0 is true, the program is ill-formed. -Let completion-fn be a completion function ([exec.async.ops]); let completion-tag be the associated completion tag of completion-fn; let args and envs be a packs of subexpressions; and let sndr be a subexpression such that sender is true and get_completion_scheduler(get_env(sndr), envs...) is well-formed and denotes a scheduler sch. If an asynchronous operation created by connecting sndr with a receiver rcvr causes the evaluation of completion-fn(rcvr, args...), the behavior is undefined unless the evaluation happens on an execution agent that belongs to sch’s associated execution resource. - -The expression forwarding_query(get_completion_scheduler) is a core constant expression and has value true. - -[ Editor's note: After subsection 33.5.9 [exec.get.compl.sched], add a new subsection with stable name [exec.get.compl.domain] as follows. ] - -execution​::​get_completion_domain [exec.get.compl.domain] -get_completion_domain obtains the completion domain associated with a completion tag from a sender’s attributes. - -The name get_completion_domain denotes a query object template. For a subexpression o and pack env, the expression get_completion_domain(o, env...) is ill-formed if completion-tag is not one of set_value_t, set_error_t, or set_stopped_t. Otherwise, get_completion_domain(o, env...) is expression-equivalent to MANDATE-NOTHROW(D()), where D is the type of the first of the following expressions that is well-formed - -(2.1)TRY-QUERY(o, get_completion_domain, env...) - -(2.2)TRY-QUERY(get_completion_scheduler(o, env...), get_completion_domain, env...) - -If none of the above expressions is well-formed, the expression get_completion_domain(o, env...) is ill-formed. - -For a type Tag, subexpression sndr, and pack env, let CS be completion_signatures_of_t, decltype((env))...>. If both get_completion_domain(get_env(sndr), env...) and CS are well-formed and CS().count-of(Tag()) == 0 is true, the program is ill-formed. - -Let completion-fn be a completion function ([exec.async.ops]); let completion-tag be the associated completion tag of completion-fn; let args and env be packs of subexpressions; and let sndr be a subexpression such that sender is true and get_completion_domain(get_env(sndr), env...) is well-formed and denotes a domain d. If an asynchronous operation created by connecting sndr with a receiver rcvr causes the evaluation of completion-fn(rcvr, args...), the behavior is undefined unless the evaluation happens on an execution agent of an execution resource whose associated execution domain tag is d. - -The expression forwarding_query(get_completion_domain) is a core constant expression and has value true. - -[ Editor's note: In 33.6 [exec.sched], change paragraphs 1, 5, and 6 as follows: ] - -The scheduler concept defines the requirements of a scheduler type (33.3 [exec.async.ops]). schedule is a customization point object that accepts a scheduler. A valid invocation of schedule is a schedule-expression. - -namespace std::execution { - template - concept scheduler = - derived_from::scheduler_concept, scheduler_t> && - queryable && - requires(Sch&& sch) { - { schedule(std::forward(sch)) } -> sender; - { auto(get_completion_scheduler( - get_env(schedule(std::forward(sch))))) } - -> same_as>; - } && - equality_comparable> && - copyable>; -} -… as before … - -For a given scheduler expression sch, if the expression get_completion_scheduler(get_env(schedule(sch))) is well-formed, it shall compare equal to sch. - -For a given scheduler expression sch and pack of subexpressions env, if the expression get_completion_domain(sch, env...) is well-formed, then the expression get_completion_domain(get_env(schedule(sch)), env...) is also well-formed and has the same type. - -For a given scheduler expression sch and pack of subexpressions env, if the expression get_completion_scheduler(sch, env...) is well-formed, then the expression get_completion_scheduler(get_env(schedule(sch)), env...) is also well-formed and has the same type and value. -[ Editor's note: Change 33.9.2 [exec.snd.expos] paragraph 3 as follows: ] - -For a query object q and, a subexpression v, and a pack of subexpressions as, MAKE-ENV(q, v) is an expression env whose type satisfies queryable such that the result of env.query(q, as...) has a value equal to v (18.2 [concepts.equality]). Unless otherwise stated, the object to which env.query(q, as...) refers remains valid while env remains valid. -[ Editor's note: Before 33.9.2 [exec.snd.expos] paragraph 6, add two new paragraph as follows: ] - -For a pack of subexpressions domains, COMMON-DOMAIN(domains...) is expression-equivalent to common_type_t() if that expression is well-formed, and indeterminate_domain() otherwise, where Ds is the pack of types consisting of decltype(auto(domains))... with duplicate types removed. - -For type Tag, subexpression, sndr and pack env, COMPL-DOMAIN(Tag, sndr, env) is expression-equivalent to D() where D is the type of get_completion_domain(get_env(sndr), env...) if that expression is well-formed or if sizeof...(env) == 0 is true, and indeterminate_domain() otherwise. - -[ Editor's note: Replace 33.9.2 [exec.snd.expos] paragraph 6 about SCHED-ATTRS and SCHED-ENV with the following paragraphs: ] - -For unqualified type Tag and subexpressions sch, sndr, and pack as, SCHED-ATTRS(sch, sndr) is an expression o whose type satisfies queryable such that o.query(Tag(), as...) is expression-equivalent to: - -(8.1)Tag()(sch, as...) if Tag is a specialization of get_completion_scheduler_t or get_completion_domain_t and either of the following is true: - -(8.1.1)The template parameter of Tag is set_value_t, or - -(8.1.2)never-completes-with is true, where completion-tag is the template parameter of Tag, and Sndr is decay_t. - -(8.2)D() if Tag is get_completion_domain_t, where D is the type of the expression - -COMMON-DOMAIN(COMPL-DOMAIN(completion-tag, sndr, as...), - COMPL-DOMAIN(completion-tag, schedule(sch), FWD-ENV(as)...)) -(8.3)Otherwise, FWD-ENV(get_env(sndr)).query(Tag(), as...) if either of the following is true: - -(8.3.1)Tag is not a specialization of get_completion_scheduler_t or get_completion_domain_t, or - -(8.3.2)never-completes-with, completion-tag, decltype(as)...> is true, where completion-tag is the template parameter of Tag. - -(8.4)Otherwise, the expression o.query(Tag(), as...) is ill-formed. - -For a scheduler sch, SCHED-ENV(sch) is an expression o whose type satisfies queryable such that - -(9.1)o.query(get_scheduler) is a prvalue with the same type and value as sch, - -(9.2)o.query(get_domain) is expression-equivalent to sch.query(get_domain) if that expression is well-formed, and default_domain() otherwise. - -[ Editor's note: Remove the prototype of the exposition-only completion-domain function just before 33.9.2 [exec.snd.expos] paragraph 8, and with it remove paragraphs 8 and 9, which specify the function’s behavior. ] - -[ Editor's note: Remove 33.9.2 [exec.snd.expos] paragraphs 13 and 14 and the prototypes for the get-domain-early and get-domain-late functions. ] - -[ Editor's note: After 33.9.2 [exec.snd.expos] paragraph 47 (not-a-sender), add the following new paragraph ] - -struct not-a-scheduler { - using scheduler_concept = scheduler_t; - - constexpr auto schedule() const noexcept { - return not-a-sender(); - } -}; -[ Editor's note: Add the following new paragraphs after 33.9.2 [exec.snd.expos] paragraph 50 as follows: ] - -template - concept never-completes-with = requires { - requires (0 == get_completion_signatures().count-of(SetTag())); - }; -template - constexpr auto call-with-default(Fn&& fn, Default&& value, Args&&... args) noexcept(see below); -Let e be the expression std::forward(fn)(std::forward(args)...) if that expression is well-formed; otherwise, it is static_cast(std​::​forward(value)). - -Returns: e. - -Remarks: The expression in the noexcept clause is noexcept(e). - -template - struct inline-attrs { - see below - }; -For a subexpression env, inline-attrs{}.query(get_completion_scheduler, env) is expression-equivalent to get_scheduler(env). - -For a subexpression env, inline-attrs{}.query(get_completion_domain, env) is expression-equivalent to get_domain(env). - -[ Editor's note: Add a new subsection before [exec.domain.default] with stable name [exec.domain.indeterminate] as follows: ] - -33.9.? execution::indeterminate_domain [exec.domain.indeterminate] -namespace std::execution { - template - struct indeterminate_domain { - indeterminate_domain() = default; - constexpr indeterminate_domain(auto&&) noexcept {} - - template - static constexpr sender decltype(auto) transform_sender(Tag, Sndr&& sndr, const Env& env) - noexcept(see below); - }; -} -template - static constexpr sender decltype(auto) transform_sender(Tag, Sndr&& sndr, const Env& env) - noexcept(see below); -Mandates: For each type D in Domains..., the expression D().transform_sender(Tag(), std::forward(sndr), env) is either ill-formed or has the same decayed type as default_domain().transform_sender(Tag(), std::forward(sndr), env). - -Returns: default_domain().transform_sender(Tag(), std::forward(sndr), env) - -Remarks: For a pack of types Ds, common_type_t, indeterminate_domain> is indeterminate_domain where Us is the pack of types in Domains..., Ds... except with duplicate types removed. For a type D that is not a specialization of indeterminate_domain, common_type_t, D> is D if sizeof...(Domains) == 0 is true, and common_type_t, indeterminate_domain> otherwise. - -[ Editor's note: Change 33.9.5 [exec.domain.default] as follows: ] - -33.9.5 execution​::​default_domain [exec.domain.default] -namespace std::execution { - struct default_domain { - template - requires (sizeof...(Env) <= 1) - static constexpr sender decltype(auto) transform_sender(Tag, Sndr&& sndr, const Env&... env) - noexcept(see below); - - template - static constexpr queryable decltype(auto) transform_env(Sndr&& sndr, Env&& env) noexcept; - - template - static constexpr decltype(auto) apply_sender(Tag, Sndr&& sndr, Args&&... args) - noexcept(see below); - }; -} -template - requires (sizeof...(Env) <= 1) - static constexpr sender decltype(auto) transform_sender(Tag, Sndr&& sndr, const Env&... env) - noexcept(see below); -Let e be the expression - -tag_of_t().transform_sender(Tag(),std::forward(sndr), env...) -if that expression is well-formed; otherwise, static_cast(std​::​forward(sndr)). [ Editor's note: See https://cplusplus.github.io/LWG/issue4368 ] - -Returns: e. - -Remarks: The exception specification is equivalent to noexcept(e). - -template - constexpr queryable decltype(auto) transform_env(Sndr&& sndr, Env&& env) noexcept; -Let e be the expression - -tag_of_t().transform_env(std::forward(sndr), std::forward(env)) -if that expression is well-formed; otherwise, FWD-ENV(std​::​forward(env)). - -Mandates: noexcept(e) is true. - -Returns: e. - -template -constexpr decltype(auto) apply_sender(Tag, Sndr&& sndr, Args&&... args) - noexcept(see below); -Let e be the expression - -Tag().apply_sender(std::forward(sndr), std::forward(args)...) -Constraints: e is a well-formed expression. - -Returns: e. - -Remarks: The exception specification is equivalent to noexcept(e). - -[ Editor's note: Change 33.9.6 [exec.snd.transform] as follows: ] - -execution::transform_sender [exec.snd.transform] -namespace std::execution { - template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender(Domain dom, Sndr&& sndr, const Env&... env) - noexcept(see below); -} -For a subexpression s, let domain-for(start, s) be D() where D is the decayed type of get_domain(env) if that expressions that is well-formed, and default_domain otherwise. - -Let domain-for(set_value, s) be D() where D is the decayed type of get_completion_domain(get_env(sndr), env) if that is well-formed, and default_domain otherwise. - -Let transformed-sndr(dom, tag, s) be the expression - -dom.transform_sender(std::forward(sndr), env...) -dom.transform_sender(tag, s, env) -if that expression is well-formed; otherwise, - -default_domain().transform_sender(std::forward(sndr), env...) -default_domain().transform_sender(tag, s, env) -Let final-sndr transform-recurse(dom, tag, s) be the expression transformed-sndr(dom, tag, s) if transformed-sndr(dom, tag, s) and sndr s have the same type ignoring cv-qualifiers; otherwise, it is the expression transform_sender(dom, transformed-sndr, env...) transform-recurse(dom2, tag, s2) where s2 is transformed-sender(dom, tag, s) and d2 is domain-for(tag, s2). - -Let tmp-sndr be the expression - -transform-recurse(domain-for(set_value, sndr), set_value, sndr) -and let final-sndr be the expression - -transform-recurse(domain-for(start, tmp-sndr), start, tmp-sndr) -Returns: final-sndr. - -Remarks: The exception specification is equivalent to noexcept(final-sndr). - -[ Editor's note: Remove section 33.9.7 [exec.snd.transform.env]. ] - -[ Editor's note: Change 33.9.9 [exec.getcomplsigs] paragraph 1 as follows: ] - -template - consteval auto get_completion_signatures() -> valid-completion-signatures auto; -Let except be an rvalue subexpression of an unspecified class type Except such that move_constructible && derived_from is true. Let CHECKED-COMPLSIGS(e) be e if e is a core constant expression whose type satisfies valid-completion-signatures; otherwise, it is the following expression: (e, throw except, completion_signatures()) Let get-complsigs() be expression-equivalent to remove_reference_t​::​template get_completion_signatures(). Let NewSndr be Sndr if sizeof...(Env) == 0 is true; otherwise, decltype(s) where s is the following expression: - -transform_sender( - get-domain-late(declval(), declval()...), - declval(), - declval()...) -[ Editor's note: Change 33.9.10 [exec.connect] paragraph 2 as follows: ] - -The name connect denotes a customization point object. For subexpressions sndr and rcvr, let Sndr be decltype((sndr)) and Rcvr be decltype((rcvr)), let new_sndr be the expression - -transform_sender(decltype(get-domain-late(sndr, get_env(rcvr))){}, sndr, get_env(rcvr)) -and let DS and DR be decay_t and decay_t, respectively. - -[ Editor's note: Remove 33.9.11.1 [exec.schedule] paragraph 4 as follows: ] - -If the expression - -get_completion_scheduler(get_env(sch.schedule())) == sch -is ill-formed or evaluates to false, the behavior of calling schedule(sch) is undefined. - -[ Editor's note: Change 33.9.11.2 [exec.just] paragraph 2 as follows: ] - -The names just, just_error, and just_stopped denote customization point objects. Let just-cpo be one of just, just_error, or just_stopped. For a pack of subexpressions ts, let Ts be the pack of types decltype((ts)). The expression just-cpo(ts...) is ill-formed if - -(2.1)(movable-value &&...) is false, or - -(2.2)just-cpo is just_error and sizeof...(ts) == 1 is false, or - -(2.3)just-cpo is just_stopped and sizeof...(ts) == 0 is false. - -Otherwise, it is expression-equivalent to make-sender(just-cpo, product-type{ts...}). - -For just, just_error, and just_stopped, let set-cpo be set_value, set_error, and set_stopped, respectively. The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for just-cpo as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto get-attrs = - [](const auto& data) noexcept -> inline-attrs> { - return {}; - }; - - static constexpr auto start = - [](auto& state, auto& rcvr) noexcept -> void { - auto& [...ts] = state; - set-cpo(std::move(rcvr), std::move(ts)...); - }; - }; -} -[ Editor's note: Change 33.9.11.3 [exec.read.env] paragraph 3 as follows: ] - -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for read_env as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto get-attrs = - [](const auto& data) noexcept -> inline-attrs { - return {}; - }; - - static constexpr auto start = - [](auto query, auto& rcvr) noexcept -> void { - TRY-SET-VALUE(rcvr, query(get_env(rcvr))); - }; - }; - - template - static consteval void check-types(); -} -[ Editor's note: Change 33.9.12.5 [exec.starts.on] paragraphs 3 and 4, and insert a new paragraph 5 as follows: ] - -Otherwise, the expression starts_on(sch, sndr) is expression-equivalent to: - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(starts_on, sch, sndr)) -except that sch is evaluated only once. - -Let out_sndr and env be subexpressions such that OutSndr is decltype((out_sndr)). If sender-for is false, then the expressions starts_on.transform_env(out_sndr, env) and starts_on.transform_sender(set_value,out_sndr, env) areis ill-formed; otherwise - -(4.1)starts_on.transform_env(out_sndr, env) is equivalent to: -auto&& [_, sch, _] = out_sndr; -return JOIN-ENV(SCHED-ENV(sch), FWD-ENV(env)); -(4.2)starts_on.transform_sender(set_value,out_sndr, env) is equivalent to: -auto&& [_, sch, sndr] = out_sndr; -return let_value( - schedule(sch), - continues_on(just(), sch), - [sndr = std::forward_like(sndr)]() mutable - noexcept(is_nothrow_move_constructible_v>) { - return std::move(sndr); - }); -The exposition-only class template impls-for is specialized for starts_on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = - [](const auto& sch, const auto& child) noexcept -> decltype(auto) { - return see below; - }; - }; -} -For subexpressions sch and child and pack of subexpressions env, let sch2 be get_completion_scheduler(sch, env...), and let env2 be the pack JOIN-ENV(SCHED-ENV(sch2), env). impls-for::get-attrs(sch, child) returns an object attrs such that - -(5.1)attrs.query(get_completion_scheduler, env...) is expression-equivalent to get_completion_scheduler(get_env(child), env2...). - -(5.2)attrs.query(get_completion_domain, env...) is expression-equivalent to get_completion_domain(get_env(child), env2...). - -(5.3)For a subexpression q and pack args, attrs.query(q, args...) is expression-equivalent to FWD-ENV(get_env(child)).query(q, args...) where the type of q is not a specialization of either get_completion_scheduler_t or get_completion_domain_t. - -[ Editor's note: Remove subsection 33.9.12.6 [exec.continues.on] ] - -[ Editor's note: Change stable name 33.9.12.7 [exec.schedule.from] to [exec.continues.on], and change the subsection as follows: ] - -33.9.12.76 execution::schedule_fromcontinues_on [exec.schedule.from.continues.on] - -schedule_fromcontinues_on schedules work dependent on the completion of a sender onto a scheduler’s associated execution resource. - -[Note 1: schedule_from is not meant to be used in user code; it is used in the implementation of continues_on. — end note] - -The name schedule_fromcontinues_on denotes a customization point object. For some subexpressions sch and sndr, let Sch be decltype((sch)) and Sndr be decltype((sndr)). If Sch does not satisfy scheduler, or Sndr does not satisfy sender, schedule_from(sch, sndr)continues_on(sndr, sch) is ill-formed. - -Otherwise, the expression schedule_from(sch, sndr)continues_on(sndr, sch) is expression-equivalent to: make-sender(continues_on, sch, schedule_from(sndr)) - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(schedule_from, sch, sndr)) -except that sch is evaluated only once. - -The exposition-only class template impls-for (33.9.1 [exec.snd.general]) is specialized for schedule_from_tcontinues_on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - - template - static consteval void check-types(); - }; -} -The member impls-for​::​get-attrs is initialized with a callable object equivalent to the following lambda: - -[](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); - return SCHED-ATTRS(data, child); -} -The member impls-for​::​get-state is initialized with a callable object equivalent to the following lambda: - -… as before … - -… as before … - -The member impls-for​::complete is initialized with a callable object equivalent to the following lambda: - -… as before … - -Let out_sndr be a subexpression denoting a sender returned from schedule_from(sch, sndr)continues_on(sndr, sch) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be … as before … - -[ Editor's note: After 33.9.12.6 [exec.continues.on], add a new subsection with stable name [exec.schedule.from] as follows: ] - -execution::schedule_from [exec.schedule.from] -schedule_from offers scheduler authors a way to customize how to transition off of their schedulers’ associated execution contexts. - -[ Note:  schedule_from is not meant to be used in user code; it is used in the implementation of continues_on. — end note ] - -The name schedule_from denotes a customization point object. For some subexpression sndr, if decltype(sndr) does not satisfy sender, schedule_from(sndr) is ill-formed. - -Otherwise, the expression schedule_from(sndr) is expression-equivalent to make-sender(schedule_from, {}, sndr). - -[ Editor's note: Change 33.9.12.8 [exec.on] as follows: ] - -execution::on [exec.on] -The on sender adaptor has two forms … as before … - -The name on denotes a … as before … - -Otherwise, if decltype((sndr)) satisfies sender, the expression on(sch, sndr) is expression-equivalent to: - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(on, sch, sndr)) -except that sch is evaluated only once. - -For subexpressions sndr, sch, and closure, if - -(4.1)decltype((sch)) does not satisfy scheduler, or -(4.2)decltype((sndr)) does not satisfy sender, or -(4.3)closure is not a pipeable sender adaptor closure object (33.9.12.2 [exec.adapt.obj]), -the expression on(sndr, sch, closure) is ill-formed; otherwise, it is expression-equivalent to: - -transform_sender( - get-domain-early(sndr), - make-sender(on, product-type{sch, closure}, sndr)) -except that sndr is evaluated only once. - -Let out_sndr and env be subexpressions, let OutSndr be decltype((out_sndr)), and let Env be decltype((env)). If sender-for is false, then the expressions on.transform_env(out_sndr, env) and on.transform_sender(set_value,out_sndr, env) areis ill-formed. - -Otherwise: Let not-a-scheduler be an unspecified empty class type. - -The expression on.transform_env(out_sndr, env) has effects equivalent to: - -auto&& [_, data, _] = out_sndr; -if constexpr (scheduler) { - return JOIN-ENV(SCHED-ENV(std::forward_like(data)), FWD-ENV(std::forward(env))); -} else { - return std::forward(env); -} -Otherwise, Tthe expression on.transform_sender(set_value,out_sndr, env) has effects equivalent to: - -auto&& [_, data, child] = out_sndr; -if constexpr (scheduler) { - auto orig_sch = - query-with-default(get_scheduler, env, not-a-scheduler()); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return continues_on( - starts_on(std::forward_like(data), std::forward_like(child)), - std::move(orig_sch)); - } -} else { - auto& [sch, closure] = data; - auto orig_sch = query-with-default( - get_completion_scheduler, - get_env(child), - query-with-default(get_scheduler, env, not-a-scheduler())); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return write_env( - continues_on( - std::forward_like(closure)( - continues_on( - write_env(std::forward_like(child), SCHED-ENV(orig_sch)), - sch)), - orig_sch), - SCHED-ENV(sch)); - } -} -auto&& [_, data, child] = out_sndr; -if constexpr (scheduler) { - auto orig_sch = - call-with-default(get_scheduler, not-a-scheduler(), env); - - return continues_on( - starts_on(std::forward_like(data), std::forward_like(child)), - std::move(orig_sch)); -} else { - auto& [sch, closure] = data; - auto orig_sch = call-with-default( - get_completion_scheduler, not-a-scheduler(), get_env(child), env); - - return continues_on( - std::forward_like(closure)(continues_on(std::forward_like(child), sch)), - orig_sch); -} -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = - [](const auto& data, const auto& child) noexcept { - return see below; - }; - }; -} -Given a pack of subexpressions env, let Env be the pack decltype(env), let Data be the decayed type of data, let sndr be a const lvalue reference to the basic-sender object that owns the objects to which data and child refer, and let new_attrs be a queryable object equal to get_env(on.transform_sender(set_value, sndr_copy, env...)) where sndr_copy is a hypothetical non-const object equal to sndr. The above lambda returns an object attrs such that, for a query object q and pack of subexpressions as, attrs.query(q, as...) is ill-formed if new_attrs.query(q, as...) is ill-formed; otherwise, it has the same type and value as new_attrs.query(q, as...). - -Recommended practice: Implementations should compute the results of queries without actually calling on_t::transform_sender. - -[ Editor's note: Change 33.9.12.9 [exec.then] paragraphs 3 and 4 and add a new paragraph as follows: ] - -Otherwise, the expression then-cpo(sndr, f) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), make-sender(then-cpo, f, sndr)) -except that sndr is evaluated only once. - -For then, upon_error, and upon_stopped, let set-cpo be set_value, set_error, and set_stopped, respectively. The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for then-cpo as follows: - -namespace std::execution { - template<> - struct impls-for> : default-impls { - static constexpr auto get-attrs = - [](const auto& data, const auto& child) { return see below; }; - - static constexpr auto complete = … as before …; - - template - static consteval void check-types(); - }; -} -Let q be a query object and let as be a pack of subexpressions. For subexpressions child and fn, impls-for>::get-attrs(fn, child) returns an object o whose type satisfies queryable such that o.query(q, as...) is expression-equivalent to: - -(5.1)D() if q is get_completion_domain, where D is the type of the expression - -COMMON-DOMAIN(COMPL-DOMAIN(decayed-typeof, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_value_t, child, FWD-ENV(as)...)) -(5.2)D() if q is get_completion_domain and gather-signatures, CS, is-nothrow, conjunction>::value is false, where CS is completion_signatures_of_t, FWD-ENV-T(decltype((as)))...>; is-nothrow is an alias template such that for a pack of types Ts, is-nothrow is is_nothrow_invocable, Ts...>; and D is the type of the expression - -COMMON-DOMAIN(COMPL-DOMAIN(decayed-typeof, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_error_t, child, FWD-ENV(as)...)) -(5.3)Otherwise, FWD-ENV(get_env(child)).query(q, as...) if either of the following is true: - -(5.2.1)q is get_completion_scheduler and set-cpo is set_value, or - -(5.2.2)q is not get_completion_domain> or get_completion_scheduler>. - -(5.4)Otherwise, o.query(q, as...) is ill-formed. - -[ Editor's note: Change 33.9.12.10 [exec.let] as follows: ] - -let_value, let_error, and let_stopped … as before … - -For let_value, let_error, and let_stopped, let set-cpo be set_value, set_error, and set_stopped, respectively. Let the expression let-cpo be one of let_value, let_error, or let_stopped. For a subexpressions sndr and env, let let-env(sndr, env) be expression-equivalent to the first well-formed expression below: - -(2.1)SCHED-ENV(get_completion_scheduler>(get_env(sndr), FWD-ENV(env))) -(2.2)MAKE-ENV(get_domain, get_domain(get_env(sndr))) -(2.2)MAKE-ENV(get_domain, get_completion_domain>(get_env(sndr), FWD-ENV(env))) -(2.3)(void(sndr), env<>{}) -The names let_value, let_error, and let_stopped denote … as before … - -Otherwise, the expression let-cpo(sndr, f) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), make-sender(let-cpo, f, sndr)) -except that sndr is evaluated only once. - -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for let-cpo as follows: - -namespace std::execution { - template - void let-bind(State& state, Rcvr& rcvr, Args&&... args); // exposition only - - template<> - struct impls-for> : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - - template - static consteval void check-types(); - }; -} -Let receiver2 denote the following exposition-only class template: - -namespace std::execution { - template - struct receiver2 { - … as before … - }; -} -Invocation of the function receiver2​::​get_env returns an object e such that - -(6.1)decltype(e) models queryable and - -(6.2)given a query object q and pack of subexpressions as, the expression e.query(q, as...) is expression-equivalent to env.query(q, as...) if that expression is valid; otherwise, if the type of q satisfies forwarding-query, e.query(q, as...) is expression-equivalent to get_env(rcvr).query(q, as...); otherwise, e.query(q, as...) is ill-formed. - -Effects: Equivalent to: - -… as before … - -where env-t is the pack decltype(let-cpo.transform_env(declval(), declval())) decltype(JOIN-ENV(let-env(declval>(), declval()), FWD-ENV(declval()))). - -impls-for>::get-attrs is initialized with a callable object equivalent to the following: - -[](const auto& fn, const auto& child) { - return see below; -} -Let SetCPO be the decayed type of set-cpo, let Fn be the decayed type of fn, and let Sndrs be a pack of the parameters of the tuple specialization named by gather-signatures, where CS is completion_signatures_of_t, FWD-ENV-T(decltype((as)))...> and invoke-result is an alias template such that invoke-result is invoke_result_t for a pack Ts. The lambda above returns an object o whose type satisfies queryable such that, for a query object q and pack of subexpressions as, let let_env be the pack JOIN-ENV(let-env(child, as), FWD-ENV(as)). Then o.query(q, as...) is as follows: - -(8.1)If q is get_completion_domain, o.query(q, as...) is expression-equivalent to D() where D is the type of the expression: - -COMMON-DOMAIN(COMPL-DOMAIN(SetCPO, declval(), let_env...)...) -(8.2)Otherwise, if q is get_completion_domain and gather-signatures::value is well-formed and false, o.query(q, as...) is ill-formed if sizeof...(as) == 0 is true; otherwise it is expression-equivalent to D(), where is-nothrow is the alias template - -template - using is-nothrow = bool_constant< - (noexcept(auto(declval())) &&...) && - noexcept(connect(invoke(declval(), declval&>()...), - receiver-archetype()))>; -where receiver-archetype is an empty class type such that receiver_of is true for any specialization of completion_signatures U and such that the type of receiver-archetype().get_env() is decltype((as...[0])) [ Editor's note: This assumes the adoption of [P3388R2], forwarded to LWG during the Kona 2025 meeting. ]; and where D is the type of the following expression: - -COMMON-DOMAIN(COMPL-DOMAIN(SetCPO, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_error_t, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_error_t, declval(), let_env...)...) -(8.3)Otherwise, if q is get_completion_domain or get_completion_domain, let Tag be the template parameter of the type of q. Then o.query(q, as...) is D() where D() is the type of the following expression: - -COMMON-DOMAIN(COMPL-DOMAIN(Tag, child, FWD-ENV(as)...), - COMPL-DOMAIN(Tag, declval(), let_env...)...) -impls-for>::get-state is initialized with a callable object equivalent to the following: - -[](Sndr&& sndr, Rcvr& rcvr) requires see below { - auto& [_, fn, child] = sndr; - using fn_t = decay_t; - using env_t = decltype(let-env(child, get_env(rcvr))); - using args_variant_t = see below; - using ops2_variant_t = see below; - - struct state-type { - fn_t fn; // exposition only - env_t env; // exposition only - args_variant_t args; // exposition only - ops2_variant_t ops2; // exposition only - }; - return state-type{allocator-aware-forward(std::forward_like(fn), rcvr), - let-env(child, get_env(rcvr)), {}, {}}; -} -[ Editor's note: leave paragraphs 9-13 unchanged ] - -Let sndr and env be subexpressions, and let Sndr be decltype((sndr)). If sender-for> is false, then the expression let-cpo.transform_env(sndr, env) is ill-formed. Otherwise, it is equal to: - -auto& [_, _, child] = sndr; -return JOIN-ENV(let-env(child), FWD-ENV(env)); -Let the subexpression out_sndr denote … as before … -[ Editor's note: Change 33.9.12.11 [exec.bulk] paragraph 3 and 4 as follows: ] - -Otherwise, the expression bulk-algo(sndr, policy, shape, f) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), make-sender( - bulk-algo, product-type{policy, shape, f}, sndr)) -except that sndr is evaluated only once. - -The first template argument of product-type is Policy if Policy models copy_constructible, and const Policy& otherwise. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)). If sender-for is false, then the expression bulk.transform_sender(set_value, sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto [_, data, child] = sndr; -auto& [policy, shape, f] = data; -auto new_f = [func = std::move(f)](Shape begin, Shape end, auto&&... vs) - noexcept(noexcept(f(begin, vs...))) { - while (begin != end) func(begin++, vs...); -} -return bulk_chunked(std::move(child), policy, shape, std::move(new_f)); -[ Note: This causes the bulk(sndr, policy, shape, f) sender to be expressed in terms of bulk_chunked(sndr, policy, shape, f) when it is connected to a receiver whose execution domain does not customize bulk. — end note ] - -[ Editor's note: Change 33.9.12.12 [exec.when.all] paragraphs 2 and 3 as follows: ] - -The names when_all and when_all_with_variant denote customization point objects. Let sndrs be a pack of subexpressions, and let Sndrs be a pack of the types decltype((sndrs))... , and let CD be the type common_type_t. Let CD2 be CD if CD is well-formed, and default_domain otherwise. The expressions when_all(sndrs...) and when_all_with_variant(sndrs...) are ill-formed if any of the following is true: - -(2.1)sizeof...(sndrs) is 0, or - -(2.2)(sender && ...) is false. - -The expression when_all(sndrs...) is expression-equivalent to: - -transform_sender(CD2(), make-sender(when_all, {}, sndrs...)) -[ Editor's note: Change 33.9.12.12 [exec.when.all] paragraphs 9 and 10 as follows: ] - -template - static consteval void check-types(); -Let Is be the pack of integral template arguments of the integer_sequence specialization denoted by indices-for. - -Effects: Equivalent to: … as before … - -Throws: Any exception thrown as a result of evaluating the Effects, or an exception of an unspecified type derived from exception when CD is ill-formed. -The member impls-for​::​get-attrs is initialized with a callable object equivalent to the following lambda expression: - -[](auto&&, auto&&... child) noexcept { - if constexpr (same_as) { - return env<>(); - } else { - return MAKE-ENV(get_domain, CD()); - } - return see below; -} -Let WHEN-ALL-DOMAIN(Tag, env...) be the expression COMMON-DOMAIN(COMPL-DOMAIN(Tag, child, env...)...). The lambda expression above returns an object attrs such that - -(10.1)attrs.query(get_completion_domain, env...) is expression-equivalent to WHEN-ALL-DOMAIN(set_value_t, env...). - -(10.2)attrs.query(get_completion_domain, env...) is expression-equivalent to: - -COMMON-DOMAIN(WHEN-ALL-DOMAIN(set_value_t, env...), - WHEN-ALL-DOMAIN(set_error_t, env...), - WHEN-ALL-DOMAIN(set_stopped_t, env...)) -(10.3)attrs.query(get_completion_domain, env...) is expression-equivalent to: [ Editor's note: This assumes the adoption of P3887R0?, which was forwarded to LWG at the Kona 2025 meeting. ] - -COMMON-DOMAIN(WHEN-ALL-DOMAIN(set_value_t, env...), - WHEN-ALL-DOMAIN(set_stopped_t, env...)) -[ Editor's note: Change 33.9.12.12 [exec.when.all] paragraphs 19 and 20 as follows: ] - -The expression when_all_with_variant(sndrs...) is expression-equivalent to: - -transform_sender(CD2(), make-sender(when_all_with_variant, {}, sndrs...)); -Given subexpressions sndr and env, if sender-for is false, then the expression when_all_with_variant.transform_sender(set_value, sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, _, ...child] = sndr; -return when_all(into_variant(std::forward_like(child))...); -[ Note: This causes the when_all_with_variant(sndrs...) sender to become when_all(into_variant(sndrs)...) when it is connected with a receiver whose execution domain does not customize when_all_with_variant. — end note ] - -[ Editor's note: Change 33.9.12.13 [exec.into.variant] paragraph 3 as follows: ] - -Otherwise, the expression into_variant(sndr) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), - make-sender(into_variant, {}, sndr)); -[ Editor's note: Change 33.9.12.14 [exec.stopped.opt] paragrpah 2-4 as follows: ] - -The name stopped_as_optional denotes a pipeable sender adaptor object. For a subexpression sndr, let Sndr be decltype((sndr)). The expression stopped_as_optional(sndr) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), - make-sender(stopped_as_optional, {}, sndr)) -except that sndr is only evaluated once. - -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for stopped_as_optional_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = - [](const auto&, const auto& child) noexcept -> decltype(auto) { - see below - }; - - template - static consteval void check-types() { - … as before … - } - }; -} -where unspecified-exception is a type derived from exception. - -For subexpressions ign and child, pack of subexpressions as, and query object q, impls-for::get-attrs(ign, child) returns an object o such that o.query(q, as...) is expression-equivalent to: - -(4.1)D() if q is get_completion_domain, where D is the type of the expression - -COMMON-DOMAIN(COMPL-DOMAIN(set_value_t, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_stopped_t, child, FWD-ENV(as)...)) -(4.2)FWD-ENV(get_env(child)).query(q, as...) if q is not get_completion_domain, get_completion_scheduler, or get_completion_scheduler. - -(4.3)Otherwise, o.query(q, as...) is ill-formed. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false then the expression stopped_as_optional.transform_sender(set_value, sndr, env) is ill-formed; otherwise, if sender_in, FWD-ENV-T(Env)> is false, the expression stopped_as_optional.transform_sender(set_value, sndr, env) is equivalent to not-a-sender(); otherwise, it is equivalent to: - -auto&& [_, _, child] = sndr; -using V = single-sender-value-type, FWD-ENV-T(Env)>; -return let_stopped( - then(std::forward_like(child), - [](Ts&&... ts) noexcept(is_nothrow_constructible_v) { - return optional(in_place, std::forward(ts)...); - }), - []() noexcept { return just(optional()); }); -[ Editor's note: Change 33.9.12.15 [exec.stopped.err] paragraphs 2-3 as follows: ] - -The name stopped_as_error denotes a pipeable sender adaptor object. For some subexpressions sndr and err, let Sndr be decltype((sndr)) and let Err be decltype((err)). If the type Sndr does not satisfy sender or if the type Err does not satisfy movable-value, stopped_as_error(sndr, err) is ill-formed. Otherwise, the expression stopped_as_error(sndr, err) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), make-sender(stopped_as_error, err, sndr)) -except that sndr is only evaluated once. - -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for stopped_as_error_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = - [](const auto&, const auto& child) noexcept -> decltype(auto) { - see below - }; - }; -} -For subexpressions ign and child, pack of subexpressions as, and query object q, impls-for::get-attrs(ign, child) returns an object o such that o.query(q, as...) is expression-equivalent to: - -(4.1)D() if q is get_completion_domain, where D is the type of the expression - -COMMON-DOMAIN(COMPL-DOMAIN(set_error_t, child, FWD-ENV(as)...), - COMPL-DOMAIN(set_stopped_t, child, FWD-ENV(as)...)) -(4.2)FWD-ENV(get_env(child)).query(q, as...) if q is not get_completion_domain, get_completion_scheduler, or get_completion_scheduler. - -(4.3)Otherwise, o.query(q, as...) is ill-formed. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false, then the expression stopped_as_error.transform_sender(set_value, sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, err, child] = sndr; -using E = decltype(auto(err)); -return let_stopped( - std::forward_like(child), - [err = std::forward_like(err)]() mutable noexcept(is_nothrow_move_constructible_v) { - return just_error(std::move(err)); - }); -[ Editor's note: Change 33.9.12.16 [exec.associate] paragraph 10 as follows: ] - -The name associate denotes a pipeable sender adaptor object. For subexpressions sndr and token: - -(10.1)If decltype((sndr)) does not satisfy sender, or remove_cvref_t does not satisfy scope_token, then associate(sndr, token) is ill-formed. - -(10.2)Otherwise, the expression associate(sndr, token) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), - make-sender(associate, associate-data(token, sndr))) -except that sndr is evaluated only once. - -[ Editor's note: Change 33.9.13.1 [exec.sync.wait] paragraphs 4-9 as follows: ] - -The name this_thread​::​sync_wait denotes a customization point object. For a subexpression sndr, let Sndr be decltype((sndr)). The expression this_thread​::​sync_wait(sndr) is expression-equivalent to the following, except that sndr is evaluated only once: - -run_loop loop; -sync-wait-env env{&loop}; -auto domain = get_completion_domain(get_env(sndr), env); -return apply_sender(get-domain-early(sndr)domain, sync_wait, sndr, env); -Mandates: - -(4.1)sender_in is true. - -(4.2)The type sync-wait-result-type is well-formed. - -(4.3)same_as> is true, where e is the apply_sender expression above. - -Let sync-wait-state and sync-wait-receiver be the following exposition-only class templates: [ Editor's note: Note the addition of & to the declaration of the sync-wait_state::loop data member. ] - -namespace std::this_thread { - template - struct sync-wait-state { // exposition only - execution::run_loop& loop; // exposition only - exception_ptr error; // exposition only - sync-wait-result-type result; // exposition only - }; - - template - struct sync-wait-receiver { // exposition only - using receiver_concept = execution::receiver_t; - sync-wait-state* state; // exposition only - - template - void set_value(Args&&... args) && noexcept; - - template - void set_error(Error&& err) && noexcept; - - void set_stopped() && noexcept; - - sync-wait-env get_env() const noexcept { return {&state->loop}; } - }; -} -[ Editor's note: Leave paragraphs 6-8 as-is ] - -For a subexpressions sndr and env, let Sndr be decltype((sndr)). If sender_to> is false or if the type of env is not sync-wait-env, the expression sync_wait.apply_sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -sync-wait-state state{*env.loop}; -auto op = connect(sndr, sync-wait-receiver{&state}); -start(op); - -state.loop.run(); -if (state.error) { - rethrow_exception(std::move(state.error)); -} -return std::move(state.result); -[ Editor's note: Change 33.9.13.2 [exec.sync.wait.var] as follows: ] - -The name this_thread​::​sync_wait_with_variant denotes a customization point object. For a subexpression sndr, let Sndr be decltype(into_variant(sndr)). The expression this_thread​::​sync_wait_with_variant(sndr) is expression-equivalent to the following, except sndr is evaluated only once: - -run_loop loop; -sync-wait-env env{&loop}; -auto domain = get_completion_domain(get_env(sndr), env); -return apply_sender(get-domain-early(sndr)domain, sync_wait_with_variant, sndr, env); -Mandates: - -(1.1)sender_in is true. - -(1.2)The type sync-wait-with-variant-result-type is well-formed. - -(1.3)same_as> is true, where e is the apply_sender expression above. - -For subexpressions sndr and env, let Sndr be decltype(into_variant(sndr)). If sender_to> is false or if the type of env is not sync-wait-env, Ttthe expression sync_wait_with_variant.apply_sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -using result_type = sync-wait-with-variant-result-type; -if (auto opt_value = sync_wait.apply_sender(into_variant(sndr), env)) { - return result_type(std::move(get<0>(*opt_value))); -} -return result_type(nullopt); -The behavior of this_thread​::​sync_wait_with_variant(sndr) is undefined unless … as before … - -[ Editor's note: Change 33.11.1 [exec.prop] as follows: ] - -namespace std::execution { - template - struct prop { - QueryTag query_; // exposition only - ValueType value_; // exposition only - - constexpr const ValueType& query(QueryTag, auto&&...) const noexcept { - return value_; - } - }; - - template - prop(QueryTag, ValueType) -> prop>; -} -… as before … - -[ Editor's note: Change 33.11.2 [exec.env] as follows: ] - -namespace std::execution { - template - struct env { - Envs0 envs0; // exposition only - Envs1 envs1; // exposition only - ⋮ - Envsn-1 envsn-1; // exposition only - - template - constexpr decltype(auto) query(QueryTag q, Args&&... args) const noexcept(see below); - }; - - template - env(Envs...) -> env...>; -} -The class template env is used to construct a queryable object from several queryable objects. Query invocations on the resulting object are resolved by attempting to query each subobject in lexical order. -… as before … - -template -constexpr decltype(auto) query(QueryTag q, Args&&... args) const noexcept(see below); -Let has-query be the following exposition-only concept: - -template - concept has-query = // exposition only - requires (const Env& env, Args&&... args) { - env.query(QueryTag(), std::forward(args)...); - }; -Let fe be the first element of envs0, envs1, … envsn-1 such that the expression fe.query(q, std::forward(args)...) is well-formed. - -Constraints: (has-query || ...) is true. - -Effects: Equivalent to: return fe.query(q, std::forward(args)...); - -Remarks: The expression in the noexcept clause is equivalent to noexcept(fe.query(q, std::forward(args)...)). - -[ Editor's note: In 33.12.1.2 [exec.run.loop.types], add a new paragraph after paragraph 4 as follows: ] - -Let sch be an expression of type run-loop-scheduler. The expression schedule(sch) has type run-loop-sender and is not potentially-throwing if sch is not potentially-throwing. -For type set-tag other than set_error_t, the expression get_completion_scheduler(get_env(schedule(sch))) == sch evaluates to true. -[ Editor's note: Change 33.13.3 [exec.affine.on] paragraphs 3 and 4 as follows: ] - -Otherwise, the expression affine_on(sndr, sch) is expression-equivalent to: make-sender(affine_on, sch, sndr). - -transform_sender(get-domain-early(sndr), make-sender(affine_on, sch, sndr)) -except that sndr is evaluated only once. - -The exposition-only class template impls-for (33.9.2 [exec.snd.expos]) is specialized for affine_on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = - [](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); - return SCHED-ATTRS(data, child); - }; - }; -} -[ Editor's note: Change 33.13.4 [exec.inline.scheduler] paragraphs 1-3 as follows: ] - -inline_scheduler is a class that models scheduler (33.6 [exec.sched]). All objects of type inline_scheduler are equal. For a subexpression sch of type inline_scheduler, a query object q, and a pack of subexpressions as, the expression sch.query(q, as...) is expression-equivalent to inline-attrs().query(q, as...). - -inline-sender is an exposition-only type that satisfies sender. The type completion_signatures_of_t is completion_signatures. - -Let sndr be an expression of type inline-sender, let rcvr be an expression such that receiver_of is true where CS is completion_signatures, then: - -(3.1)the expression connect(sndr, rcvr) has type inline-state> and is potentially-throwing if and only if ((void)sndr, auto(rcvr)) is potentially-throwing, and -(3.2)the expression get_completion_scheduler(get_env(sndr)) has type​ inline_scheduler and is potentially-throwing if and only if get_env(sndr) is potentially-throwing. -[ Editor's note: For reference: cplusplus/sender-receiver#349 ] - -5 Removing algorithm customization -This section discusses the implications of removing algorithm customizability. - -Removing algorithm customization is fairly straightforward in most regards. The sender algorithm customization point objects would directly return the result of calling make-sender instead of passing it to transform_sender. - -However, there are a few parts of std::execution that need special care when making the algorithms non-customizable. - -5.1 The parallel scheduler -The parallel_scheduler goes to great lengths to ensure that the bulk family of algorithms – bulk, bulk_chunked, and bulk_unchunked – are executed in parallel when the user requests it and when the underlying execution context supports it. - -To that end, the parallel_scheduler “provides a customized implementation” of the bulk_chunked and bulk_unchunked algorithms, but nothing is said about how those custom implementations are found or under what circumstances users can be assured that the parallel_scheduler will use them. Arguably, this is under-specified in the current Working Draft and should be addressed whether this paper is accepted or not. - -We have to give users a guarantee that if X, Y, and Z conditions are met, bulk[_[un]chunked] will be run in parallel with absolute certainty. - -One solution is to say that the bulk algorithms are guaranteed to execute in parallel when the immediate predecessor of the bulk operation is known to complete on the parallel_scheduler. In a sender expression such as the following: - -namespace ex = std::execution; -sndr | ex::bulk(ex::par, 1024, fn) -If sndr’s attributes advertizes a completion scheduler of type parallel_scheduler, then we can guarantee that the bulk operation will execute in parallel. Implementations can choose to parallelize bulk under other circumstances, but we require this one. - -The implication of offering this guarantee is that we must preserve the guarantee going forward. Any new customization mechanism we might add must never result in parallel execution becoming serialized. - -The reverse is not necessarily true though. I maintain that a future change that parallelizes a bulk algorithm that formerly executed serially on the parallel_scheduler is an acceptable change of behavior. - -If SG1 or LEWG disagrees, there are ways to avoid even this behavior change. - -5.2 The task scheduler -Library issue #4336 describes the poor interaction between task_scheduler, a type-erased scheduler, and the bulk family of algorithms; namely, that the task_scheduler always executes bulk in serial, even when it is wrapping a parallel_scheduler. - -This is not a problem caused by the customization mechanism, but it is something that can be addressed as part of the customization removal process. - -When we address that issue, we must avoid the parallel_scheduler pitfall by under-specifying the interaction with bulk. As with parallel_scheduler, users must have a guarantee about the conditions under which bulk is accelerated on a task_scheduler. - -Fortunately, the parallel_scheduler has already given us a way to punch the bulk_chunked and bulk_unchunked algorithms through a type-erased API boundary: parallel_scheduler_backend (33.16.4 [exec.sysctxrepl.psb]). By specifying the behavior of task_scheduler in terms of parallel_scheduler_backend and bulk_item_receiver_proxy, we can give task_scheduler the ability to parallelize bulk without having to invent a new mechanism. - -5.3 The bulk algorithms -Few users will ever have a need to customize an algorithm like then or let_value. The bulk algorithms are a different story. Anybody with a custom thread pool will benefit from a custom bulk implementation that can run in parallel on the thread pool. The loss of algorithm customization is particularly painful in this area. This section explores some options to address these concerns and makes a recommendation. - -5.3.1 Option 1: Remove bulk, bulk_chunked, and bulk_unchunked -This option cuts the Gordian knot, but comes at a high cost. The parallel_scheduler can hardly be called “parallel” if it does not offer a way to execute work in parallel, so cutting the bulk algorithms probably means cutting parallel_scheduler also. - -5.3.2 Option 2: Magical parallel execution -In this option, we keep the bulk algorithms and the parallel_scheduler, and we say that the bulk algorithms are executed in parallel on the parallel_scheduler (and on a task_scheduler that wraps a parallel_scheduler), but we leave the mechanism unspecified. - -This option is essentially the status quo, except that as discussed in The parallel scheduler, this aspect of the parallel_scheduler is currently under-specified. The referenced section proposes a path forward. - -A variant of this option is to specify an exposition-only mechanism whereby bulk gets parallelized. - -This option makes parallel_scheduler and task_scheduler “magic” with respect to the bulk algorithms. End users would have no standard mechanism to parallelize bulk on their own third-party thread pools in C++26. - -This is the approach taken by the Proposed wording, option 2: Remove algorithm customization below. - -5.3.3 Option 3: A normative mechanism for the bulk* algorithms only -In this option, we reintroduce algorithm customization with a special-purpose API just for the bulk algorithms. For example, a scheduler might have an optional sch.bulk_transform(sndr, env) that turns a serial bulk* sender into one that executes in parallel on scheduler sch. Whenever a bulk* sender is passed to connect, connect can check the sender’s predecessor for a completion scheduler that defines bulk_transform and uses it if found. - -The downside of this approach is that we will still have to support this API even when a more general algorithm customization mechanism is available. - -5.4 Impacts on hardware vendors -Without algorithm customization, manufacturers of special-purpose hardware accelerators will not be able to ship a scheduler that both: - -works with any standard-conforming implementation of std::execution, and - -performs optimally on their hardware for all of the standard algorithms. - -See Mitigating factors for some reasons why this is not as terrible as it could be. - -5.5 Mitigating factors -The loss of direct support for sender algorithm customization is a blow to power users of std::execution, but there are a few factors that mitigate the blow. - -5.5.1 Sender introspection -All of the senders returned from the standard algorithms are self-describing and can be unpacked into their constituent parts with structured bindings. A sufficiently motivated user can “customize” an algorithm by writing a recursive sender tree transformation, explicitly transforming senders before launching them. - -5.5.2 Third party algorithms -The sender concepts and customization points make it possible for users to write their own sender algorithms that interoperate with the standard ones. If a user wants to change the behavior of the then algorithm in some way, they have the option of writing their own and using it instead. I expect libraries of third-party algorithms to appear on GitHub in time, as they tend to. - -5.5.3 Difficulties with proprietary extensions -Some execution contexts place extra-standard requirements on the code that executes on them. For example, NVIDIA GPUs require device-accelerated code to be annotated with its proprietary __device__ annotation. Standard libraries are unlikely to ship implementations of std::execution with such annotations. The consequence is that, rather than shipping just a GPU scheduler with some algorithm customizations, a vendor like NVIDIA is already committed to shipping its own complete implementation of std::execution (in a different namespace, of course). - -For such vendors, the inability to customize standard algorithms is a moot point. Since it is implementing the standard algorithms, they can make the implementations do whatever they want. - -5.6 The removal process -5.6.1 Approach -The approach to removing sender algorithm customization is twofold: - -Remove those components that facilitate algorithm customization and their uses where it is easy to do so. - -In all other cases, turn normative mechanisms into non-normative ones so we can change them later. This results in smaller and safer wording changes and preserves the already agreed-upon semantics in a way that is easy to verify. - -5.6.2 Procedure -The steps for removing algorithm customization are detailed below. - -Remove the type default_domain (33.9.5 [exec.domain.default]). - -Remove the functions: - -transform_sender (33.9.6 [exec.snd.transform]), -transform_env (33.9.7 [exec.snd.transform.env]), and -apply_sender (33.9.8 [exec.snd.apply]). -Remove the query object get_domain (33.5.5 [exec.get.domain]). - -Remove the exposition-only helpers: - -completion-domain (33.9.2 [exec.snd.expos]/8-9), -get-domain-early (33.9.2 [exec.snd.expos]/13), and -get-domain-late (33.9.2 [exec.snd.expos]/14). -Change the functions get_completion_signatures (33.9.9 [exec.getcomplsigs]) and connect (33.9.10 [exec.connect]) to operate on a sender determined as follows instead of passing the sender through transform_sender: - -If the sender has a tag with an exposition-only transform-sender member function, pass the sender to this function with the receiver’s environment and then call connect on the resulting sender. This preserves the behavior of calling transform_sender with the default_domain. - -Otherwise, call the connect member function of the passed-in sender. - -For the following algorithms that are currently expressed in terms of a sender transformation to a lowered form, move the lowering from alg.transform_sender(sndr, env) to alg.transform-sender(sndr, env). - -starts_on (33.9.12.5 [exec.starts.on]), -continues_on (33.9.12.6 [exec.continues.on]), -on (33.9.12.8 [exec.on]), -bulk (33.9.12.11 [exec.bulk]), -when_all_with_variant (33.9.12.12 [exec.when.all]), -stopped_as_optional (33.9.12.14 [exec.stopped.opt]), and -stopped_as_error (33.9.12.15 [exec.stopped.err]). -For each sender adaptor algorithm in 33.9.12 [exec.adapt] that is specified to be expression-equivalent to some transform_sender invocation of the form: - -transform_sender(some-computed-domain(), make-sender(tag, {args...}, sndr)); -Change the expression to: - -make-sender(tag, {args...}, sndr); -For example, in 33.9.12.6 [exec.continues.on]/3, the following: - -transform_sender(get-domain-early(sndr), make-sender(continues_on, sch, sndr)) -would be changed to: - -make-sender(continues_on, sch, sndr) -Additionally, if there is some caveat of the form “except that sndr is evaluated only once,” that caveat should be removed as appropriate. - -Merge the schedule_from (33.9.12.7 [exec.schedule.from]) and continues_on (33.9.12.6 [exec.continues.on]) algorithms into one algorithm called continues_on. (Currently they are separate so that they can be customized independently; by default continues_on merely dispatches to schedule_from.) - -Change 33.9.13.1 [exec.sync.wait] and 33.9.13.2 [exec.sync.wait.var] to dispatch directly to their default implementations instead of computing a domain and using apply_sender to dispatch to an implementation. - -Fix a bug in the on(sndr, sch, closure) algorithm where a write_env is incorrectly changing the “current” scheduler before its child continues_on actually transfers to that scheduler. continues_on needs to know the scheduler on which it will be started in order to find customizations correctly in the future. - -Tweak the wording of parallel_scheduler (33.15 [exec.par.scheduler]) to indicate that it (parallel_scheduler) is permitted to run the bulk family of algorithms in parallel in accordance with those algorithms’ semantics, rather than suggesting that those algorithms are “customized” for parallel_scheduler. The mechanism for such remains non-normative, however we specify the conditions under which the parallel_scheduler is guaranteed to run the bulk algorithms in parallel. (This is currently under-specified.) - -Respecify task_scheduler in terms of parallel_scheduler_backend so that the bulk algorithms can be accelerated despite task_scheduler’s type-erasure. This addresses LWG#4336. As with parallel_scheduler, we specify the conditions under which task_scheduler is guaranteed to run the bulk algorithms in parallel. - -From the scheduler concept, remove the required expression: - -{ auto(get_completion_scheduler(get_env(schedule(std::forward(sch))))) } - -> same_as>; -Instead, add a semantic requirement that if the above expression is well-formed, then it shall compare equal to sch. Additionally, require that that expression is well-formed for the parallel_scheduler, the task_scheduler, and run_loop’s scheduler, but not inline_scheduler. See inline_scheduler improvements for the motivation behind these changes, but in short: the inline_scheduler will not know where it completes in C++26, but it will in C++29 when we add back algorithm customization. - -Optional, but recommended: Change the env<>::query member function to accept optional additional arguments after the query tag. This restores the original design of env to that which was first proposed in [P3325R1] and which was approved by LEWG straw poll in St Louis. As described in Restoring algorithm customization in C++29, when asking a sender for its completion scheduler, the caller needs to pass extra information about where the operation will be started, and that will require env<>::query to accept extra arguments. - -5.7 Restoring algorithm customization in C++29 -For C++29, we want the sender algorithms in std::execution to be customizable, with different implementations suited for different execution contexts. If we remove customization for C++26, how do we add it back without breaking code? - -Recall that many senders do not know where they will complete until they know where they will be started, and that information is not currently provided when the sender is queried for its completion scheduler. This is the shoal on which algorithm customization has foundered, because without accurate information about where operations are executing, it is impossible to pick the right algorithm implementation. - -Once the problem is stated plainly, the fix (or at least a major part of it) is obvious: - -When asking the sender where it will complete, tell it where it will start. - -The implication of this is that so-called “early” customization, performed when constructing a sender, will not be coming back. The receiver’s execution environment is not known when constructing a sender. C++29 will bring back “late” customization only. - -5.7.1 Completion scheduler enhancements -A paper targetting C++29 would propose that we extend the get_completion_scheduler query to support an optional environment argument. Given a sender S and receiver R, the query would look like: - -// Pass the sender's attributes and the receiver's environment when computing -// the completion scheduler: -auto sch = get_completion_scheduler(get_env(S), get_env(R)); -It will not be possible in C++26 to pass the receiver’s environment in this way, making this a conforming extension since it would not change the meaning of any existing code. - -This change will also make it possible to provide a completion scheduler for the error channel in more cases. That is often not possible today since many errors are reported inline on the context on which the operation is started. The receiver’s environment knows where the operation will be started, so by passing it to the get_completion_scheduler query, the error completion scheduler is knowable. - -NoteThe paragraph above makes it sound like this would be changing the behavior for the get_completion_scheduler(get_env(sndr)) query. But that expression will behave as it always has. Only when called with the receiver’s environment will any new behavior manifest; hence, this change is a pure extension. - -By the way, this extension to get_completion_scheduler motivates the change to env<>::query described above in The removal process. Although we could decide to defer that change until it is needed in C++29, it seems best to me to make the change now. - -5.7.2 Domains -There are sender expressions that complete on an indeterminate scheduler based on runtime factors; when_all is a good example. This is the problem the get_domain query solved. So long as all of when_all’s child senders share a common domain tag – a property of the scheduler – we know the domain on which the when_all operation will complete, even though we do not know which scheduler it will complete on. The domain controls algorithm selection, not the scheduler directly. - -So the plan will be to bring back a get_domain query in C++29. Additionally, just as it is necessary to have three get_completion_scheduler queries, one each for the three different completion channels, it is necessary to have three get_completion_domain queries for the times when the completion scheduler is indeterminate but the domain is known. - -NoteAbove we say, “So long as all of when_all’s child senders share a common domain tag […]”. This sounds like we are adding a new requirement to the when_all algorithm. However, this requirement will be met for all existing uses of when_all. Before C++29, all senders will be in the “default” domain, so they trivially all share a common domain. - -Giving a non-default domain to a scheduler is the way to opt-in to algorithm customization. Prior to C++29, there would be no get_*domain queries, hence the addition of those queries in C++29 would not affect any existing schedulers. And the domain queries would be so-called “forwarding” queries, meaning they will automatically be passed through layers of sender adaptors. Users will not have to change their code in order for domain information to be propagated. As a result, this change is a pure extension. - -5.7.3 Customizing connect -Since C++29 would support only late (connect-time) customization, customizing an algorithm effectively amounts to customizing that algorithm’s connect operation. By default, connect(sndr, rcvr) calls sndr.connect(rcvr), but in C++29 there will be a way to do something different depending on the sender’s attributes and the receiver’s environment. - -As described in Algorithm dispatching in connect, connect will compute two domains, the “starting” domain and the (value) “completion” domain: - -Domain kind -Query -Starting domain get_domain(get_env(rcvr)) -Completion domain get_completion_domain(get_env(sndr), get_env(rcvr)) -The connect CPO would use these two domains to optionally apply “starting” and “completing” transformations to the sender prior to calling .connect(rcvr) on it. This will be a conforming extension since no schedulers in C++26 will have a custom domain tag, and hence no sender transformations. - -5.7.4 The parallel and task schedulers and bulk -Once we have a general mechanism for customizing algorithms, we can consider changing parallel_scheduler and task_scheduler to use that mechanism to find parallel implementations of the bulk algorithms. In C++26, it is unspecified precisely how those schedulers accelerate bulk, and we can certainly leave it that way for C++29. No change is often the safest change. - -If we wanted to switch to using the new algorithm dispatch mechanics in C++29, I believe we can do so with minimal impact on existing code. Any behavior change would be an improvement, accelerating bulk operations that should have been accelerated but were not. - -Consider the following sender: - -starts_on(parallel_scheduler(), just() | bulk(fn)) -In C++26, we can offer no iron-clad standard guarantee that this bulk operation will be accelerated even though it is executing on the parallel scheduler. The predecessor of bulk, just(), would not know where it will complete in C++26. There is no plumbing yet to tell it that it will be started on the parallel scheduler. As a result, it is QoI whether this bulk will execute in parallel or not. - -But suppose we add a get_completion_domain query to the parallel_scheduler such that the query returns an instance of a new type: parallel_domain. Now, when connecting the bulk sender, connect will ask for the predecessor’s domain, passing also the receiver’s environment. Now the just() sender is able to say where it completes: the domain where it starts, get_domain(get_env(rcvr)). This will return parallel_domain{}. connect would then use that information to find a parallel implementation of bulk. - -As a result, in C++29 we could guarantee that this usage of bulk will be parallelized. For some stdlib implementations, this would be a behavior change: what once executed serially on a thread of the parallel scheduler now executes in parallel on many threads. Can that break working code? Yes, but only code that had already violated the preconditions of bulk: that fn can safely be called in parallel. - -I do not believe this should be considered a breaking change, since any code that breaks is already broken. - -All of the above is true also for task_scheduler, which merely adds an indirection to the call to connect. After the changes suggested by the “Remove” option, the task_scheduler accelerates bulk in the same way as parallel_scheduler. - -NoteIf we assign parallel_domain to the parallel_scheduler, and we also add a requirement to when_all that all of its child operations share a common domain (see Domains), does that have the potential to break existing code? It would not. We would make parallel_domain inherit from default_domain so that when_all will compute the common domain as default_domain even if one child completes in the parallel_domain. - -5.8 Proposed wording, option 2: Remove algorithm customization -[ Editor's note: In 33.4 [execution.syn], make the following changes: ] - -… as before … - -namespace std::execution { - // [exec.queries], queries - struct get_domain_t { unspecified }; - struct get_scheduler_t { unspecified }; - struct get_delegation_scheduler_t { unspecified }; - struct get_forward_progress_guarantee_t { unspecified }; - template - struct get_completion_scheduler_t { unspecified }; - struct get_await_completion_adaptor_t { unspecified }; - - inline constexpr get_domain_t get_domain{}; - inline constexpr get_scheduler_t get_scheduler{}; - inline constexpr get_delegation_scheduler_t get_delegation_scheduler{}; - enum class forward_progress_guarantee; - inline constexpr get_forward_progress_guarantee_t get_forward_progress_guarantee{}; - template - constexpr get_completion_scheduler_t get_completion_scheduler{}; - inline constexpr get_await_completion_adaptor_t get_await_completion_adaptor{}; - -… as before … - - // [exec.env], class template env - template - struct env; - - // [exec.domain.default], execution domains - struct default_domain; - - // [exec.sched], schedulers - struct scheduler_t {}; - -… as before … - - template - using tag_of_t = see below; - - // [exec.snd.transform], sender transformations - template - requires (sizeof...(Env) <= 1) - constexpr sender decltype(auto) transform_sender( - Domain dom, Sndr&& sndr, const Env&... env) noexcept(see below); - - // [exec.snd.transform.env], environment transformations - template - constexpr queryable decltype(auto) transform_env( - Domain dom, Sndr&& sndr, Env&& env) noexcept; - - // [exec.snd.apply], sender algorithm application - template - constexpr decltype(auto) apply_sender( - Domain dom, Tag, Sndr&& sndr, Args&&... args) noexcept(see below); - - // [exec.connect], the connect sender algorithm - struct connect_t; - inline constexpr connect_t connect{}; - -… as before … -[ Editor's note: Remove subsection 33.5.5 [exec.get.domain]. ] - -[ Editor's note: In 33.6 [exec.sched], change paragraphs 1 and 5 and strike paragraph 6 as follows: ] - -The scheduler concept defines the requirements of a scheduler type (33.3 [exec.async.ops]). schedule is a customization point object that accepts a scheduler. A valid invocation of schedule is a schedule-expression. - -namespace std::execution { - template - concept scheduler = - derived_from::scheduler_concept, scheduler_t> && - queryable && - requires(Sch&& sch) { - { schedule(std::forward(sch)) } -> sender; - { auto(get_completion_scheduler( - get_env(schedule(std::forward(sch))))) } - -> same_as>; - } && - equality_comparable> && - copyable>; -} -… as before … - -For a given scheduler expression sch, if the expression auto(get_completion_scheduler(get_env(schedule(sch)))) is well-formed, it shall have type remove_cvref_t and shall compare equal to sch. -For a given scheduler expression sch, if the expression get_domain(sch) is well-formed, then the expression get_domain(get_env(schedule(sch))) is also well-formed and has the same type. -[ Editor's note: In 33.9.1 [exec.snd.general], change paragraph 1 as follows: ] - -Subclauses 33.9.11 [exec.factories] and 33.9.12 [exec.adapt] define customizable algorithms that return senders. Each algorithm has a default implementation. Let sndr be the result of an invocation of such an algorithm or an object equal to the result (18.2 [concepts.equality]), and let Sndr be decltype((sndr)). Let rcvr be a receiver of type Rcvr with associated environment env of type Env such that sender_to is true. For the default implementation of the algorithm that produced sndr, cConnecting sndr to rcvr and starting the resulting operation state (33.3 [exec.async.ops]) necessarily results in the potential evaluation (6.3 [basic.def.odr]) of a set of completion operations whose first argument is a subexpression equal to rcvr. Let Sigs be a pack of completion signatures corresponding to this set of completion operations, and let CS be the type of the expression get_completion_signatures(). Then CS is a specialization of the class template completion_signatures (33.10 [exec.cmplsig]), the set of whose template arguments is Sigs. If none of the types in Sigs are dependent on the type Env, then the expression get_completion_signatures() is well-formed and its type is CS. If a user-provided implementation of the algorithm that produced sndr is selected instead of the default: - -(1.1)Any completion signature that is in the set of types denoted by completion_signatures_of_t and that is not part of Sigs shall correspond to error or stopped completion operations, unless otherwise specified. - -(1.2)If none of the types in Sigs are dependent on the type Env, then completion_signatures_of_t and completion_signatures_of_t shall denote the same type. - -[ Editor's note: Change 33.9.2 [exec.snd.expos] paragraph 6 as follows: ] - -For a scheduler sch, SCHED-ATTRS(sch) is an expression o1 whose type satisfies queryable such that o1.query(get_completion_scheduler) is an expression with the same type and value as sch equivalent to MAKE-ENV(get_completion_scheduler, sch) where Tag is one of set_value_t or set_stopped_t, and such that o1.query(get_domain) is expression-equivalent to sch.query(get_domain). SCHED-ENV(sch) is an expression o2 whose type satisfies queryable such that o2.query(get_scheduler) is a prvalue with the same type and value as sch, and such that o2.query(get_domain) is expression-equivalent to sch.query(get_domain) equivalent to MAKE-ENV(get_scheduler, sch). -[ Editor's note: Remove the prototype of the exposition-only completion-domain function just before 33.9.2 [exec.snd.expos] paragraph 8, and with it remove paragraphs 8 and 9, which specify the function’s behavior. ] - -[ Editor's note: Remove 33.9.2 [exec.snd.expos] paragraphs 13 and 14 and the prototypes for the get-domain-early and get-domain-late functions. ] - -[ Editor's note: Remove subsection 33.9.5 [exec.domain.default]. ] - -[ Editor's note: Remove subsection 33.9.6 [exec.snd.transform]. ] - -[ Editor's note: Remove subsection 33.9.7 [exec.snd.transform.env]. ] - -[ Editor's note: Remove subsection 33.9.8 [exec.snd.apply]. ] - -[ Editor's note: Change 33.9.9 [exec.getcomplsigs] as follows: ] - -Let except be an rvalue subexpression of an unspecified class type Except such that move_constructible && derived_from is true. Let CHECKED-COMPLSIGS(e) be e if e is a core constant expression whose type satisfies valid-completion-signatures; otherwise, it is the following expression: - -(e, throw except, completion_signatures()) -Let get-complsigs() be expression-equivalent to remove_reference_t​::​template get_completion_signatures(). Let NewSndr be Sndr if sizeof...(Env) == 0 is true; otherwise, decltype(s) where s is the following expression: Let NewSndr be decltype(tag_of_t().transform-sender(declval(), declval()...)) if that expression is well-formed, and Sndr otherwise. - -transform_sender( - get-domain-late(declval(), declval()...), - declval(), - declval()...) -Constraints: sizeof...(Env) <= 1 is true. - -Effects: Equivalent to: … as before … - -[ Editor's note: Change 33.9.10 [exec.connect] as follows: ] - -connect connects (33.3 [exec.async.ops]) a sender with a receiver. - -The name connect denotes a customization point object. For subexpressions sndr and rcvr, let Sndr be decltype((sndr)) and Rcvr be decltype((rcvr)),; let new_sndr be the expression transform_sender(decltype(get-domain-late(sndr, get_env(rcvr))){}, sndr, get_env(rcvr)) tag_of_t().transform-sender(sndr, get_env(rcvr)) if that expression is well-formed, and sndr otherwise; and let DS and DR be decay_t and decay_t, respectively. - -Let connect-awaitable-promise be … as before … - -[ Editor's note: Change 33.9.11.1 [exec.schedule] paragraph 4 as follows: ] - -If the expression - -get_completion_scheduler(get_env(sch.schedule()))== sch -is ill-formed or well-formed and does not evaluates to false sch, the behavior of calling schedule(sch) is undefined. - -[ Editor's note: From 33.9.12.1 [exec.adapt.general], strike paragraph (3.6) as follows: ] - -Unless otherwise specified: - -… as before … - -(3.5)An adaptor whose child senders are all non-dependent (33.3 [exec.async.ops]) is itself non-dependent. - -(3.6)These requirements apply to any function that is selected by the implementation of the sender adaptor. - -(3.7)Recommended practice: Implementations should use the completion signatures of the adaptors to communicate type errors to users and to propagate any such type errors from child senders. - -[ Editor's note: Change 33.9.12.5 [exec.starts.on] paragraph 3 as follows: ] - -Otherwise, the expression starts_on(sch, sndr) is expression-equivalent to: make-sender(starts_on, sch, sndr). - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(starts_on, sch, sndr)) -except that sch is evaluated only once. - -Let out_sndr and env be subexpressions such that OutSndr is decltype((out_sndr)). If sender-for is false, then the expressions starts_on.transform_env(out_sndr, env) and expression starts_on.transform_sendertransform-sender(out_sndr, env) are is ill-formed; otherwise it is equivalent to: - -(4.1)starts_on.transform_env(out_sndr, env) is equivalent to: - -auto&& [_, sch, _] = out_sndr; -return JOIN-ENV(SCHED-ENV(sch), FWD-ENV(env)); -(4.2)starts_on.transform_sender(out_sndr, env) is equivalent to: - -auto&& [_, sch, sndr] = out_sndr; -return let_value( - schedule(sch), - [sndr = std::forward_like(sndr)]() mutable - noexcept(is_nothrow_move_constructible_v>) { - return std::move(sndr); - }); -Let out_sndr be … as before … -[ Editor's note: Remove subsection 33.9.12.6 [exec.continues.on] ] - -[ Editor's note: Change 33.9.12.7 [exec.schedule.from] to [exec.continues.on] and change it as follows: ] - -33.9.12.76 execution::schedule_fromcontinues_on [exec.schedule.from.continues.on] - -schedule_fromcontinues_on schedules work dependent on the completion of a sender onto a scheduler’s associated execution resource. - -[Note 1: schedule_from is not meant to be used in user code; it is used in the implementation of continues_on. — end note] - -The name schedule_fromcontinues_on denotes a customization point object. For some subexpressions sch and sndr, let Sch be decltype((sch)) and Sndr be decltype((sndr)). If Sch does not satisfy scheduler, or Sndr does not satisfy sender, schedule_from(sch, sndr)continues_on(sndr, sch) is ill-formed. - -Otherwise, the expression schedule_from(sch, sndr)continues_on(sndr, sch) is expression-equivalent to: make-sender(continues_on, sch, sndr) - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(schedule_from, sch, sndr)) -except that sch is evaluated only once. - -The exposition-only class template impls-for (33.9.1 [exec.snd.general]) is specialized for schedule_from_tcontinues_on_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-state = see below; - static constexpr auto complete = see below; - - template - static consteval void check-types(); - }; -} -The member impls-for​::​get-attrs is initialized with a callable object equivalent to the following lambda: - -[](const auto& data, const auto& child) noexcept -> decltype(auto) { - return JOIN-ENV(SCHED-ATTRS(data), FWD-ENV(get_env(child))); -} -The member impls-for​::​get-state is initialized with a callable object equivalent to the following lambda: - -… as before … - -template - static consteval void check-types(); -… as before … - -The member impls-for​::complete is initialized with a callable object equivalent to the following lambda: - -… as before … - -Let out_sndr be a subexpression denoting a sender returned from schedule_from(sch, sndr)continues_on(sndr, sch) or one equal to such, and let OutSndr be the type decltype((out_sndr)). Let out_rcvr be … as before … - -[ Editor's note: Change 33.9.12.8 [exec.on] paragraphs 3-8 as follows: ] - -Otherwise, if decltype((sndr)) satisfies sender, the expression on(sch, sndr) is expression-equivalent to: make-sender(on, sch, sndr). - -transform_sender( - query-with-default(get_domain, sch, default_domain()), - make-sender(on, sch, sndr)) -except that sch is evaluated only once. - -For subexpressions sndr, sch, and closure, if - -(4.1)decltype((sch)) does not satisfy scheduler, or - -(4.2)decltype((sndr)) does not satisfy sender, or - -(4.3)closure is not a pipeable sender adaptor closure object ([exec.adapt.obj]), the expression on(sndr, sch, closure) is ill-formed; otherwise, it is expression-equivalent to: make-sender(on, product-type{sch, closure}, sndr). - -transform_sender( - get-domain-early(sndr), - make-sender(on, product-type{sch, closure}, sndr)) -except that sndr is evaluated only once. - -Let out_sndr and env be subexpressions, let OutSndr be decltype((out_sndr)), and let Env be decltype((env)). If sender-for is false, then the expressions on.transform_env(out_sndr, env) and expression on.transform_sendertransform-sender(out_sndr, env) are is ill-formed. - -Otherwise: Let not-a-scheduler be an unspecified empty class type. - -The expression on.transform_env(out_sndr, env) has effects equivalent to: - -auto&& [_, data, _] = out_sndr; -if constexpr (scheduler) { - return JOIN-ENV(SCHED-ENV(std::forward_like(data)), FWD-ENV(std::forward(env))); -} else { - return std::forward(env); -} -The expression on.transform_sendertransform-sender(out_sndr, env) has effects equivalent to: - -auto&& [_, data, child] = out_sndr; -if constexpr (scheduler) { - auto orig_sch = - query-with-default(get_scheduler, env, not-a-scheduler()); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return continues_on( - starts_on(std::forward_like(data), std::forward_like(child)), - std::move(orig_sch)); - } -} else { - auto& [sch, closure] = data; - auto orig_sch = query-with-default( - get_completion_scheduler, - get_env(child), - query-with-default(get_scheduler, env, not-a-scheduler())); - - if constexpr (same_as) { - return not-a-sender{}; - } else { - return write_env continues_on( - continues_on write_env( - std::forward_like(closure)( - continues_on( - write_env(std::forward_like(child), SCHED-ENV(orig_sch)), - sch)), - orig_sch SCHED-ENV(sch)), - SCHED-ENV(sch) orig_sch); - } -} -[ Editor's note: Change 33.9.12.9 [exec.then] paragraph 3 as follows: ] - -Otherwise, the expression then-cpo(sndr, f) is expression-equivalent to: make-sender(then-cpo, f, sndr). - -transform_sender(get-domain-early(sndr), make-sender(then-cpo, f, sndr)) -except that sndr is evaluated only once. - -[ Editor's note: Change 33.9.12.10 [exec.let] paragraphs 2-4 as follows: ] - -For let_value, let_error, and let_stopped, let set-cpo be set_value, set_error, and set_stopped, respectively. Let the expression let-cpo be one of let_value, let_error, or let_stopped. For a subexpression sndr, let let-env(sndr) be expression-equivalent to the first well-formed expression below: - -(2.1)SCHED-ENV(get_completion_scheduler>(get_env(sndr))) -(2.2)MAKE-ENV(get_domain, get_domain(get_env(sndr))) -(2.3)(void(sndr), env<>{}) -The names let_value, let_error, and let_stopped denote … as before … - -Otherwise, the expression let-cpo(sndr, f) is expression-equivalent to: make-sender(let-cpo, f, sndr). - -transform_sender(get-domain-early(sndr), make-sender(let-cpo, f, sndr)) -except that sndr is evaluated only once. - - -[ Editor's note: Change 33.9.12.11 [exec.bulk] paragraphs 3 and 4 and insert paragraphs 5 and 6 as follows: ] - -Otherwise, the expression bulk-algo(sndr, policy, shape, f) is expression-equivalent to: - -transform_sender(get-domain-early(sndr), make-sender( - bulk-algo, product-type{policy, shape, f}, sndr)) -except that sndr is evaluated only once. The first template argument of product-type is Policy if Policy models copy_constructible, and const Policy& otherwise. - -Let sndr and env be subexpressions be an expression such that Sndr is decltype((sndr)). If sender-for is false, then the expression bulk.transform_sender(sndr, env) as-bulk-chunked(sndr) is ill-formed; otherwise, it is equivalent to: - -auto [_, data, child] = sndr; -auto& [policy, shape, f] = data; -auto new_f = [func = std::move(f)](Shape begin, Shape end, auto&&... vs) - noexcept(noexcept(f(begin, vs...))) { - while (begin != end) - func(begin++, vs...); -} -return bulk_chunked(std::move(child), policy, shape, std::move(new_f)); -[ Note: This causes the bulk(sndr, policy, shape, f) sender to be expressed in terms of bulk_chunked(sndr, policy, shape, f) when it is connected to a receiver whose execution domain does not customize bulk. — end note ] - -Let sndr and env be subexpressions, let Sndr be decltype((sndr)), and let sch be expression-equivalent to get_completion_scheduler(get_env(sndr.get<2>())). If sender-for> is false, the expression bulk-algo.transform-sender(sndr, env) is ill-formed; otherwise, it is expression-equivalent to: - -(6.1)sch.bulk-transform(sndr, env) if that expression is well-formed; otherwise, - -(6.2)sch.bulk-transform(as-bulk-chunked(sndr), env) if that expression is well-formed; otherwise, - -(6.3)bulk-algo.transform-sender(sndr, env) is ill-formed. - -[ Editor's note: Change 33.9.12.12 [exec.when.all] as follows: ] - -when_all and when_all_with_variant both … as before … - -The names when_all and when_all_with_variant denote customization point objects. Let sndrs be a pack of subexpressions, and let Sndrs be a pack of the types decltype((sndrs))..., and let CD be the type common_type_t. Let CD2 be CD if CD is well-formed, and default_domain otherwise. The expressions when_all(sndrs...) and when_all_with_variant(sndrs...) are ill-formed if any of the following is true: - -(2.1)sizeof...(sndrs) is 0, or - -(2.2)(sender && ...) is false. - -The expression when_all(sndrs...) is expression-equivalent to: make-sender(when_all, {}, sndrs...). - -transform_sender(CD2(), make-sender(when_all, {}, sndrs...)) -The exposition-only class template impls-for (33.9.1 [exec.snd.general]) is specialized for when_all_t as follows: - -namespace std::execution { - template<> - struct impls-for : default-impls { - static constexpr auto get-attrs = see below; - static constexpr auto get-env = see below; - static constexpr auto get-state = see below; - static constexpr auto start = see below; - static constexpr auto complete = see below; - - template - static consteval void check-types(); - }; -} -… as before … - -Throws: Any exception thrown as a result of evaluating the Effects, or an exception of an unspecified type derived from exception when CD is ill-formed. -The member impls-for​::​get-attrs is initialized with a callable object equivalent to the following lambda expression: - -[](auto&&, auto&&... child) noexcept { - if constexpr (same_as) { - return env<>(); - } else { - return MAKE-ENV(get_domain, CD()); - } -} -… as before … - -The expression when_all_with_variant(sndrs...) is expression-equivalent to: make-sender(when_all_with_variant, {}, sndrs...). - -transform_sender(CD2(), make-sender(when_all_with_variant, {}, sndrs...)); -Given subexpressions sndr and env, if sender-for is false, then the expression when_all_with_variant.transform_sendertransform-sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, _, ...child] = sndr; -return when_all(into_variant(std::forward_like(child))...); -[Note 1: This causes the when_all_with_variant(sndrs...) sender to become when_all(into_variant(sndrs)...) when it is connected with a receiver whose execution domain does not customize when_all_with_variant. — end note] - -[ Editor's note: Change 33.9.12.13 [exec.into.variant] paragraph 3 as follows: ] - -Otherwise, the expression into_variant(sndr) is expression-equivalent to: make-sender(into_variant, {}, sndr). - -transform_sender(get-domain-early(sndr), make-sender(into_variant, {}, sndr)) -except that sndr is only evaluated once. - -[ Editor's note: Change 33.9.12.14 [exec.stopped.opt] paragraphs 2 and 4 as follows: ] - -The name stopped_as_optional denotes a pipeable sender adaptor object. For a subexpression sndr, let Sndr be decltype((sndr)). The expression stopped_as_optional(sndr) is expression-equivalent to: make-sender(stopped_as_optional, {}, sndr). - -transform_sender(get-domain-early(sndr), make-sender(stopped_as_optional, {}, sndr)) -except that sndr is only evaluated once. - -The exposition-only class template impls-for … as before … - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false then the expression stopped_as_optional.transform_sendertransform-sender(sndr, env) is ill-formed; otherwise, if sender_in, FWD-ENV-T(Env)> is false, the expression stopped_as_optional.transform_sendertransform-sender(sndr, env) is equivalent to not-a-sender(); otherwise, it is equivalent to: - -auto&& [_, _, child] = sndr; -using V = single-sender-value-type, FWD-ENV-T(Env)>; -return let_stopped( - then(std::forward_like(child), - [](Ts&&... ts) noexcept(is_nothrow_constructible_v) { - return optional(in_place, std::forward(ts)...); - }), - []() noexcept { return just(optional()); }); -[ Editor's note: Change 33.9.12.15 [exec.stopped.err] paragraphs 2 and 3 as follows: ] - -The name stopped_as_error denotes a pipeable sender adaptor object. For some subexpressions sndr and err, let Sndr be decltype((sndr)) and let Err be decltype((err)). If the type Sndr does not satisfy sender or if the type Err does not satisfy movable-value, stopped_as_error(sndr, err) is ill-formed. Otherwise, the expression stopped_as_error(sndr) is expression-equivalent to: make-sender(stopped_as_error, err, sndr). - -transform_sender(get-domain-early(sndr), make-sender(stopped_as_error, err, sndr)) -except that sndr is only evaluated once. - -Let sndr and env be subexpressions such that Sndr is decltype((sndr)) and Env is decltype((env)). If sender-for is false then the expression stopped_as_error.transform_sendertransform-sender(sndr, env) is ill-formed; otherwise, it is equivalent to: - -auto&& [_, err, child] = sndr; -using E = decltype(auto(err)); -return let_stopped( - std::forward_like(child), - [err = std::forward_like(err)]() noexcept(is_nothrow_move_constructible_v) { - return just_error(std::move(err)); - }); -[ Editor's note: Change 33.9.12.16 [exec.associate] paragraph 10 as follows: ] - -The name associate denotes a pipeable sender adaptor object. For subexpressions sndr and token: - -(10.1)If decltype((sndr)) does not satisfy sender, or remove_cvref_t does not satisfy scope_token, then associate(sndr, token) is ill-formed. - -(10.2)Otherwise, the expression associate(sndr, token) is expression-equivalent to: make-sender(associate, associate-data(token, sndr)). - -transform_sender(get-domain-early(sndr), - make-sender(associate, associate-data(token, sndr))) -except that sndr is evaluated only once. - -[ Editor's note: Change 33.9.13.1 [exec.sync.wait] paragraphs 4 and 9 as follows: ] - -The name this_thread​::​sync_wait denotes a customization point object. For a subexpression sndr, let Sndr be decltype((sndr)). The expression this_thread​::​sync_wait(sndr) is expression-equivalent to the following, except that sndr is evaluated only once: sync_wait.apply(sndr), where apply is the exposition-only member function specified below. - -apply_sender(get-domain-early(sndr), sync_wait, sndr) -Mandates: - -(4.1)sender_in is true. - -(4.2)The type sync-wait-result-type is well-formed. - -(4.3)same_as> is true, where e is the apply_sender expression i> -… as before … - -For a subexpression sndr, let Sndr be decltype((sndr)). If sender_to> is false, the expression sync_wait.apply_senderapply(sndr) is ill-formed; otherwise, it is equivalent to: - -sync-wait-state state; -auto op = connect(sndr, sync-wait-receiver{&state}); -start(op); - -state.loop.run(); -if (state.error) { - rethrow_exception(std::move(state.error)); -} -return std::move(state.result); -[ Editor's note: Change Note 1 in 33.9.13.1 [exec.sync.wait] paragraph 10.1 as follows: ] - -[Note 1: The default implementation of sync_wait achieves forward progress guarantee delegation by providing a run_loop scheduler via the get_delegation_scheduler query on the sync-wait-receiver’s environment. The run_loop is driven by the current thread of execution. — end note] - -[ Editor's note: Change 33.9.13.2 [exec.sync.wait.var] paragraphs 1 and 2 as follows: ] - -The name this_thread​::​sync_wait_with_variant denotes a customization point object. For a subexpression sndr, let Sndr be decltype(into_variant(sndr)). The expression this_thread​::​sync_wait_with_variant(sndr) is expression-equivalent to the following, except sndr is evaluated only once: sync_wait_with_variant.apply(sndr), where apply is the exposition-only member function specified below. - -apply_sender(get-domain-early(sndr), sync_wait_with_variant, sndr) -Mandates: - -(1.1)sender_in is true. - -(1.2)The type sync-wait-with-variant-result-type is well-formed. - -(1.3)same_as> is true, where e is the apply_sender expression i> -The expression sync_wait_with_variant.apply_senderapply(sndr) is equivalent to: - -using result_type = sync-wait-with-variant-result-type; -if (auto opt_value = sync_wait(into_variant(sndr))) { - return result_type(std::move(get<0>(*opt_value))); -} -return result_type(nullopt); -[ Editor's note: Change Note 1 in 33.9.13.1 [exec.sync.wait] paragraph 10.1 as follows: ] - -[Note 1: The default implementation of sync_wait_with_variant achieves forward progress guarantee delegation (6.10.2.3 [intro.progress]) by relying on the forward progress guarantee delegation provided by sync_wait. — end note] - -[ Editor's note: Change 33.11.2 [exec.env] as follows: ] - -namespace std::execution { - template - struct env { - Envs0 envs0; // exposition only - Envs1 envs1; // exposition only - ⋮ - Envsn-1 envsn-1; // exposition only - - template - constexpr decltype(auto) query(QueryTag q, Args&&... args) const noexcept(see below); - }; - - template - env(Envs...) -> env...>; -} -The class template env is used to construct a queryable object from several queryable objects. Query invocations on the resulting object are resolved by attempting to query each subobject in lexical order. -… as before … - -template -constexpr decltype(auto) query(QueryTag q, Args&&... args) const noexcept(see below); -Let has-query be the following exposition-only concept: - -template - concept has-query = // exposition only - requires (const Env& env, Args&&... args) { - env.query(QueryTag(), std::forward(args)...); - }; -Let fe be the first element of envs0, envs1, … envsn-1 such that the expression fe.query(q, std::forward(args)...) is well-formed. - -Constraints: (has-query || ...) is true. - -Effects: Equivalent to: return fe.query(q, std::forward(args)...); - -Remarks: The expression in the noexcept clause is equivalent to noexcept(fe.query(q, std::forward(args)...)). - -[ Editor's note: In 33.12.1.2 [exec.run.loop.types], add a new paragraph after paragraph 4 as follows: ] - -Let sch be an expression of type run-loop-scheduler. The expression schedule(sch) has type run-loop-sender and is not potentially-throwing if sch is not potentially-throwing. -For type set-tag other than set_error_t, the expression get_completion_scheduler(get_env(schedule(sch))) == sch evaluates to true. -[ Editor's note: Change 33.13.3 [exec.affine.on] paragraph 3 as follows: ] - -Otherwise, the expression affine_on(sndr, sch) is expression-equivalent to: make-sender(affine_on, sch, sndr). - -transform_sender(get-domain-early(sndr), make-sender(affine_on, sch, sndr)) -except that sndr is evaluated only once. - -[ Editor's note: Change paragraph 3 of 33.13.4 [exec.inline.scheduler] as follows: ] - -Let sndr be an expression of type inline-sender, let rcvr be an expression such that receiver_of is true where CS is completion_signatures, then: [ Editor's note: Move the text of (3.1) below into this paragraph. ] - -(3.1)the expression connect(sndr, rcvr) has type inline-state> and is potentially-throwing if and only if ((void)sndr, auto(rcvr)) is potentially-throwing, and. - -(3.2)the expression get_completion_scheduler(get_env(sndr)) has type​ inline_scheduler and is potentially-throwing if and only if get_env(sndr) is potentially-throwing. - -[ Editor's note: Change 33.13.5 [exec.task.scheduler] as follows: ] - -namespace std::execution { - class task_scheduler { - class ts-sender; // exposition only - - template - class state; // exposition only - - template - class backend-for; // exposition only - public: - using scheduler_concept = scheduler_t; - - template> - requires (!same_as>) && scheduler - explicit task_scheduler(Sch&& sch, Allocator alloc = {}); - - ts-sendersee below schedule(); - - template // exposition only - see below bulk-transform(Sndr&& sndr, const Env& env); - - friend bool operator==(const task_scheduler& lhs, const task_scheduler& rhs) noexcept; - - template - requires (!same_as) && scheduler - friend bool operator==(const task_scheduler& lhs, const Sch& rhs) noexcept; - - private: - shared_ptr sch_; // exposition only - // see [exec.sysctxrepl.psb] - }; -} -task_scheduler is a class that models scheduler (33.6 [exec.sched]). Given an object s of type task_scheduler, let SCHED(s) be the sched_ member of the object owned by s.sch_. -For an lvalue r of type derived from receiver_proxy, let WRAP-RCVR(r) be an object of a type that models receiver and whose completion handlers result in invoking the corresponding completion handlers of r. -template -struct backend-for : parallel_scheduler_backend { // exposition only - explicit backend-for(Sch sch) : sched_(std::move(sch)) {} - - void schedule(receiver_proxy& r, span s) noexcept override; - void schedule_bulk_chunked(size_t shape, bulk_item_receiver_proxy& r, - span s) noexcept override; - void schedule_bulk_unchunked(size_t shape, bulk_item_receiver_proxy& r, - span s) noexcept override; - - Sch sched_; // exposition only -}; -Let sndr be a sender whose only value completion signature is set_value_t() and for which the expression get_completion_scheduler(get_env(sndr)) == sched_ is true. -void schedule(receiver_proxy& r, span s) noexcept override; -Effects: Constructs an operation state os with connect(schedule(sched_), WRAP-RCVR(r)) and calls start(os). -void schedule_bulk_chunked(size_t shape, bulk_item_receiver_proxy& r, - span s) noexcept override; -Effects: Let chunk_size be an integer less than or equal to shape, let num_chunks be (shape + chunk_size - 1) / chunk_size, and let fn be a function object such that for an integer i, fn(i) calls r.execute(i * chunk_size, m), where m is the lesser of (i + 1) * chunk_size and shape. Constructs an operation state os as if with connect(bulk(sndr, par, num_chunks, fn), WRAP-RCVR(r)) and calls start(os). -void schedule_bulk_unchunked(size_t shape, bulk_item_receiver_proxy& r, - span s) noexcept override; -Effects: Let fn be a function object such that for an integer i, fn(i) is equivalent to r.execute(i, i + 1). Constructs an operation state os as if with connect(bulk(sndr, par, shape, fn), WRAP-RCVR(r)) and calls start(os). -template> - requires(!same_as>) && scheduler -explicit task_scheduler(Sch&& sch, Allocator alloc = {}); -Effects: Initialize sch_ with allocate_shared>>(alloc,​ std​::​forward​(sch)). -[ Editor's note: Paragraphs 3-7 are kept unmodified. Remove paragraphs 8-12 and add the following paragraphs: ] - -see below schedule(); -Returns: a prvalue sndr whose type Sndr models sender such that: - -(8.1)get_completion_scheduler(get_env(sndr)) is equal to *this. - -(8.2)If a receiver rcvr is connected to sndr and the resulting operation state is started, calls sch_->schedule(r, s), where - -(8.2.1)r is a proxy for rcvr with base system_context_replaceability​::​receiver_proxy (33.15 [exec.par.scheduler]) and - -(8.2.2)s is a preallocated backend storage for r. - -template // exposition only - see below bulk-transform(BulkSndr&& bulk_sndr, const Env& env); -Constraints: sender_in is true and either sender-for or sender-for is true. - -Returns: a prvalue sndr whose type models sender such that: - -(10.1)get_completion_scheduler(get_env(sndr)) is equal to *this. - -(10.2)bulk_sndr is connected to an unspecified receiver if a receiver rcvr is connected to sndr. If the resulting operation state is started, - -(10.2.1)If bulk_sndr completes with values vals, let args be a pack of lvalue subexpressions designating objects decay-copied from vals. Then - -(10.2.1.1)If bulk_sndr is the result of calling bulk_chunked(child, policy, shape, f), sch_->schedule_bulk_chunked(shape, r, s) is called where r is a bulk chunked proxy for rcvr with callable f and arguments args, and s is a preallocated backend storage for r. - -(10.2.1.2)Otherwise, bulk_sndr is the result of calling bulk_unchunked(child, policy, shape, f). Calls sch_->schedule_bulk_unchunked(shape, r, s) where r is a bulk unchunked proxy for rcvr with callable f and arguments args, and s is a preallocated backend storage for r. - -(10.2.2)All other completion operations are forwarded unchanged. - - -[ Editor's note: In 33.15 [exec.par.scheduler], add a new paragraph after paragraph 3, another before paragraph 10, and change paragraphs 10 and 11 as follows: ] - -The expression get_forward_progress_guarantee(sch) returns forward_progress_guarantee​::​parallel. -?.The expression get_completion_scheduler(get_env(schedule(sch))) == sch evaluates to true. - -… as before … - -?.Let sch be a subexpression of type parallel_scheduler. For subexpressions sndr and env, if tag_of_t is neither bulk_chunked_t nor bulk_unchunked_t, the expression sch.bulk-transform(sndr, env) is ill-formed; otherwise, let child, pol, shape, and f be subexpressions equal to the arguments used to create sndr. - -parallel_scheduler provides a customized implementation of the bulk_chunked algorithm (33.9.12.11 [exec.bulk]). If a receiver rcvr is connected to the sender returned by bulk_chunked(sndr, pol, shape, f) When the tag type of sndr is bulk_chunked_t, the expression sch.bulk-transform(sndr, env) returns a sender such that if it is connected to a receiver rcvr and the resulting operation state is started, then: - -(10.1)If sndrchild completes with values vals, let args be a pack of lvalue subexpressions designating vals, then b.schedule_bulk_chunked(shape, r, s) is called, where - -(10.1.1)r is a bulk chunked proxy for rcvr with callable f and arguments args and - -(10.1.2)s is a preallocated backend storage for r. - -(10.2)All other completion operations are forwarded unchanged. - -[ Note: Customizing the behavior of bulk_chunked affects the default implementation of bulk. — end note ] - -parallel_scheduler provides a customized implementation of the bulk_unchunked algorithm (33.9.12.11 [exec.bulk]). If a receiver rcvr is connected to the sender returned by bulk_unchunked(sndr, pol, shape, f) When the tag type of sndr is bulk_unchunked_t, the expression sch.bulk-transform(sndr, env) returns a sender such that if it is connected to a receiver rcvr and the resulting operation state is started, then: - -(11.1)If sndrchild completes with values vals, let args be a pack of lvalue subexpressions designating vals, then b.schedule_bulk_unchunked(shape, r, s) is called, where - -(11.1.1)r is a bulk unchunked proxy for rcvr with callable f and arguments args and - -(11.1.2)s is a preallocated backend storage for r. - -(11.2)All other completion operations are forwarded unchanged. - -6 Appendix A: Listing for updated transform_sender -The proposal requires some changes to how transform_sender operates. This new transform_sender still accepts a sender and an environment like the current one, but it no longer accepts a domain. It computes the two domains, starting and completing, and applies the two transforms, recursing if a transform changes the type of the sender. - -The implementation of transform_sender might look something like this: - -template -concept same-decayed = std::same_as, std::decay_t>; - -template -struct transform-sender-recurse -{ - template - using result-t = - decltype(Domain().transform_sender(Tag(), declval(), declval())); - - constexpr transform-sender-recurse(Domain, Tag) noexcept {} - - template - decltype(auto) operator()(this auto self, Sndr&& sndr, const Env& env) - { - if constexpr (!requires { typename result-t; }) - { - // Domain does not have a transform_sender for this sndr so use default_domain instead. - return transform-sender-recurse()(forward(sndr), env); - } - else if constexpr (same-decayed>) - { - // Domain can transform the sender but its type does not change. End recursion. - return Domain().transform_sender(Tag(), std::forward(sndr), env); - } - else if constexpr (same_as) - { - // The starting domain cannot change, so recurse on Domain - return self(Domain().transform_sender(start, std::forward(sndr), env), env); - } - else - { - // The type of sndr changes after being transformed, so the type of the completion - // domain could change too. Recurse on the (possibly) new domain: - using attrs_t = env_of_t>; - using domain_t = decltype(get_completion_domain(declval(), env)); - - return transform-sender-recurse()( - Domain().transform_sender(set_value, std::forward(sndr), env), env); - } - } -}; - -template -auto transform_sender(Sndr&& sndr, const Env& env) -{ - auto starting_domain = get_domain(env); - auto complete_domain = get_completion_domain(get_env(sndr), env); - - auto starting_transform = transform-sender-recurse(starting_domain, start); - auto complete_transform = transform-sender-recurse(complete_domain, set_value); - - return starting_transform(complete_transform(std::forward(sndr), env), env); -} -With this definition of transform_sender, connect(sndr, rcvr) is equivalent to transform_sender(sndr, get_env(rcvr)).connect(rcvr), except that rcvr is evaluated only once. - -7 References -[P3325R1] Eric Niebler. 2024-07-14. A Utility for Creating Execution Environments. -https://wg21.link/p3325r1 -[P3388R2] Robert Leahy. 2025-04-01. When Do You Know connect Doesn’t Throw? -https://wg21.link/p3388r2 -[P3718R0] Eric Niebler. 2025-06-28. Fixing Lazy Sender Algorithm Customization, Again. -https://wg21.link/p3718r0 \ No newline at end of file diff --git a/context/run_on.md b/context/run_on.md deleted file mode 100644 index 5991f5b..0000000 --- a/context/run_on.md +++ /dev/null @@ -1,95 +0,0 @@ -# run_on Design - -## Problem - -`run_on(executor, task)` needs to: -1. Run the task on a specific executor -2. Dispatch completion back to the caller's executor -3. Handle executor lifetime safely (caller may not be a coroutine) - -## Solution: `run_on_awaitable` - -A non-coroutine awaitable struct that: -- Stores executor **by value** (safe lifetime) -- Stores the task's coroutine handle -- Directly configures the task's promise when awaited - -```cpp -template -struct run_on_awaitable -{ - E ex_; - std::coroutine_handle::promise_type> h_; - - // 2-arg: co_await from coroutine - template - std::coroutine_handle<> await_suspend(coro continuation, D const& caller_ex) - { - h_.promise().ex_ = &ex_; // Execute on our executor - h_.promise().caller_ex_ = &caller_ex; // Complete to caller - h_.promise().continuation_ = continuation; - return h_; - } - - // 1-arg: detached execution (no coroutine caller) - // Precondition: 'this' is heap-allocated - std::coroutine_handle<> await_suspend_detached() - { - h_.promise().ex_ = &ex_; - h_.promise().caller_ex_ = &ex_; - h_.promise().continuation_ = nullptr; - h_.promise().detached_cleanup_ = +[](void* p) { - delete static_cast(p); - }; - h_.promise().detached_state_ = this; - return h_; - } -}; - -template -run_on_awaitable run_on(E ex, task t) -{ - return {std::move(ex), t.release()}; -} - -template -void run_async(E ex, task t) -{ - auto* state = new run_on_awaitable{std::move(ex), t.release()}; - state->await_suspend_detached().resume(); -} -``` - -## Task Promise Additions - -```cpp -// In task::promise_type: -void (*detached_cleanup_)(void*) = nullptr; -void* detached_state_ = nullptr; - -// In final_suspend: if no continuation but has cleanup, call it -``` - -## Why This Design - -| Aspect | Benefit | -|--------|---------| -| No extra coroutine frame | Zero allocation overhead | -| Executor by value | Safe lifetime regardless of caller | -| `co_await` lifetime extension | Awaitable stays alive during await | -| Heap allocation for detached | Self-destructs on completion | -| Direct promise access | No `set_executor` method needed | - -## Usage - -```cpp -// Coroutine caller - executor in awaitable, lifetime extended by co_await -co_await run_on(strand, some_task()); - -// Non-coroutine caller - heap allocated, self-destructs -run_async(strand, some_task()); -``` - -## Key Insight - -The caller knows the threading context, not the I/O object. Sockets should be executor-agnostic. `run_on` lets callers inject their executor choice into any task. diff --git a/context/sans-eager.md b/context/sans-eager.md deleted file mode 100644 index c5e9ba3..0000000 --- a/context/sans-eager.md +++ /dev/null @@ -1,155 +0,0 @@ -Great question. Let me map out the I/O patterns and see what actually requires eager semantics. - -## Pattern Analysis - -### Pattern A: Fire and Forget (Server Handlers) -```cpp -// Eager: -eager_on(ex, handle_connection(sock)); - -// Lazy equivalent: -run_async(ex, handle_connection(sock)); -``` -**Verdict: ✓ Lazy works.** `run_async` is fire-and-forget by design. - ---- - -### Pattern B: Concurrent Fan-out, Gather Results -```cpp -// Eager: -auto a = eager_on(ex, fetch("/users")); -auto b = eager_on(ex, fetch("/posts")); // both running now -auto users = co_await a; -auto posts = co_await b; - -// Lazy equivalent: -auto [users, posts] = co_await when_all( - fetch("/users"), - fetch("/posts") -); -``` -**Verdict: ✓ Lazy works** with a `when_all` combinator. - ---- - -### Pattern C: Dynamic Fan-out (Unknown N) -```cpp -// Eager: -vector> tasks; -for (auto& url : urls) - tasks.push_back(eager_on(ex, fetch(url))); -// all running concurrently -for (auto& t : tasks) - results.push_back(co_await t); - -// Lazy equivalent: -vector> tasks; -for (auto& url : urls) - tasks.push_back(fetch(url)); -auto results = co_await when_all(std::move(tasks)); -``` -**Verdict: ✓ Lazy works** with range-accepting `when_all`. - ---- - -### Pattern D: Timeout / Racing -```cpp -// Eager: -auto op = eager_on(ex, slow_operation()); -auto timer = eager_on(ex, sleep(5s)); -// whichever completes first... - -// Lazy equivalent: -auto result = co_await when_any( - slow_operation(), - sleep(5s) -); -``` -**Verdict: ✓ Lazy works** with a `when_any` combinator. - ---- - -### Pattern E: Managed Connection Pool (Klemens' Example) -```cpp -// Eager - full control: -vector> connections; -while (accepting) { - auto sock = co_await listener.accept(); - connections.push_back(eager_on(ex, handle(sock))); -} -// Later: can iterate, cancel specific ones, await all for graceful shutdown -for (auto& c : connections) - c.request_stop(); -for (auto& c : connections) - co_await c; - -// Lazy approximation: -std::atomic active_count{0}; -stop_source shutdown; -while (accepting) { - auto sock = co_await listener.accept(); - ++active_count; - run_async(ex, [&]() -> task<> { - co_await handle(sock, shutdown.get_token()); - --active_count; - }()); -} -// Graceful shutdown: -shutdown.request_stop(); -while (active_count > 0) - co_await sleep(10ms); // poll (ugly) -``` -**Verdict: ⚠️ Lazy works but loses elegance.** No direct handle to individual tasks. Shutdown requires external coordination. - ---- - -### Pattern F: Start Now, Await Later (True Interleaving) -```cpp -// Eager - true concurrency within one coroutine: -auto computation = eager_on(cpu_ex, heavy_compute()); -co_await do_io_stuff(); // computation running in parallel -co_await do_more_io(); // still running -auto result = co_await computation; // get result - -// Lazy - no equivalent: -// Can't interleave a single long operation with other awaits -// Would need to restructure as when_all or use external future -``` -**Verdict: ✗ No clean lazy equivalent.** This is the one pattern that truly requires eager semantics or an external future type. - ---- - -## Summary Table - -| Pattern | Eager Needed? | Lazy Alternative | -|---------|---------------|------------------| -| Fire-and-forget handlers | No | `run_async` | -| Fixed concurrent fan-out | No | `when_all` | -| Dynamic concurrent fan-out | No | `when_all(range)` | -| Timeout / race | No | `when_any` | -| Managed task collection | Partial | External tracking + stop_token | -| Start-now-await-later | **Yes** | External future or restructure | - -## What You Actually Lose Without Eager - -1. **Elegant task management** - Tracking/cancelling individual tasks in a dynamic set -2. **True interleaved concurrency** - Running one thing while awaiting others in the same coroutine -3. **Compositional concurrency** - Building complex concurrent patterns without restructuring code - -## What Combinators Buy You - -With just `when_all` and `when_any`, you cover ~90% of real I/O use cases: - -```cpp -// Parallel requests with timeout: -auto [result, _] = co_await when_any( - when_all(fetch(url1), fetch(url2), fetch(url3)), - sleep(30s) -); -``` - -## Recommendation - -For a **minimal I/O library**, lazy tasks + combinators + `run_async` is sufficient. The remaining gap (Pattern F) is niche and can be filled by users bringing their own future type if needed. - -Eager tasks are a "task library" concern, not an "I/O library" concern. The I/O primitives don't care whether the task awaiting them is eager or lazy. \ No newline at end of file diff --git a/context/tls-context-design.md b/context/tls-context-design.md new file mode 100644 index 0000000..88d7d26 --- /dev/null +++ b/context/tls-context-design.md @@ -0,0 +1,159 @@ +# TLS Context Abstraction Layer: Problem Statement + +## Objective + +Design a portable C++ API for TLS context configuration that abstracts credential loading, session parameters, and certificate verification across multiple SSL/TLS backend implementations (OpenSSL, wolfSSL, mbedTLS, Schannel, Secure Transport, GnuTLS, BoringSSL, s2n). + +## Scope + +The abstraction targets the **configuration phase** of TLS—specifically the SSL context object pattern common to all implementations—not the streaming I/O or handshake state machine. + +## Core Domain Concepts + +### Credential Formats + +- **X.509 certificates** — ASN.1 DER-encoded identity certificates, typically PEM-wrapped (Base64 with `-----BEGIN CERTIFICATE-----` armor) +- **Private keys** — PKCS#1 (RSA-specific), PKCS#8 (generic), SEC1 (EC-specific), PEM or DER encoded +- **Certificate chains** — Ordered intermediate CA certificates for path building +- **Trust anchors** — Root CA certificates for peer verification +- **PKCS#12/PFX** — Password-protected binary bundles containing certificate + key + chain + +### Protocol Configuration + +- **ALPN** — Application-Layer Protocol Negotiation (RFC 7301) +- **SNI** — Server Name Indication (RFC 6066) +- **Ciphersuites** — TLS 1.2 format (e.g., `ECDHE-RSA-AES128-GCM-SHA256`) and TLS 1.3 suites +- **Protocol versions** — Minimum/maximum TLS version bounds (1.2, 1.3) + +### Certificate Verification + +- **Path building** — Construct chain from leaf to trust anchor, possibly fetching missing intermediates via AIA chasing +- **Signature validation** — Each certificate's signature verified against issuer's public key +- **Validity period** — notBefore ≤ now ≤ notAfter for all certificates in chain +- **Name constraints** — Intermediate CA domain restrictions +- **Key usage / Extended key usage** — Certificate permitted for TLS server/client authentication +- **Hostname matching** — Leaf certificate SAN (or CN fallback) matches requested hostname, including wildcard handling +- **Policy validation** — Certificate policies, mappings, inhibit flags (RFC 5280) +- **SCT validation** — Certificate Transparency proof checking (RFC 6962) + +### Revocation Checking + +- **CRL** — Certificate Revocation Lists; URL in CRL Distribution Points extension; potentially large, infrequent refresh +- **OCSP** — Online Certificate Status Protocol; URL in Authority Information Access extension; per-certificate, smaller responses, shorter validity windows +- **OCSP stapling** — Server-side: attach pre-fetched OCSP response to handshake (RFC 6066 status_request); Client-side: require stapled response + +### Platform Keystores + +- **Windows Certificate Store** — System-managed trust anchors and credentials +- **macOS Keychain** — SecIdentityRef, SecCertificateRef +- **PKCS#11** — HSM token interface + +## Verification Architecture + +### Division of Responsibility + +| Task | Responsibility | +|------|----------------| +| Parse certificates, extract AIA/CRL/OCSP URLs | Library helpers or application | +| HTTP fetch of CRL/OCSP/intermediate certs | Application (requires HTTP client) | +| Parse CRL/OCSP responses | TLS library | +| Cryptographic signature validation | TLS library | +| Path building and policy evaluation | TLS library | +| Cache management and refresh scheduling | Application | + +### Bootstrap Model + +Initial TLS connections rely on **system trust stores** (pre-installed root CAs) with revocation checking disabled or soft-fail. This provides the trusted channel needed to fetch revocation data for stricter subsequent validation: + +``` +Bootstrap connection: + Trust store: system default + Revocation: disabled or soft-fail + → Sufficient for OAuth flows, OCSP fetches, CRL downloads + +Hardened connection (optional): + Trust store: system or custom + Revocation: enforced using pre-fetched data +``` + +## Required Operations + +### Credential Loading + +| Operation | Input Types | +|-----------|-------------| +| Load entity certificate | PEM blob, DER blob, file path | +| Load private key | PEM/DER blob, file path, optional passphrase | +| Load certificate chain | PEM blob (concatenated), file path | +| Set trust anchors | PEM/DER blob, file path, directory, system default | +| Load PKCS#12 bundle | Binary blob, file path, passphrase | + +### Protocol Configuration + +| Operation | Input Types | +|-----------|-------------| +| Configure ALPN | Ordered list of protocol strings | +| Set protocol version range | Min/max version enum | +| Set ciphersuites | String (OpenSSL format) or list | +| Role designation | Client context vs server context | + +### Verification Configuration + +| Operation | Input Types | +|-----------|-------------| +| Set verification mode | Enum: none, peer, require_peer | +| Set verification callback | User-defined chain validation hook | +| Set hostname for matching | String (client-side SNI and cert matching) | + +### Revocation Configuration + +| Operation | Input Types | +|-----------|-------------| +| Add CRL | DER blob, file path | +| Set OCSP staple (server) | DER blob, file path, refresh callback | +| Require OCSP staple (client) | Boolean or policy enum | +| Set revocation policy | Enum: disabled, soft_fail, hard_fail | + +## Backend Mapping + +| Backend | Context Object | Cert Loading | Verification | Revocation | +|---------|----------------|--------------|--------------|------------| +| OpenSSL/BoringSSL | `SSL_CTX` | `SSL_CTX_use_certificate` | `SSL_CTX_set_verify` + `X509_STORE` | `X509_STORE_add_crl`, `SSL_CTX_set_tlsext_status_*` | +| wolfSSL | `WOLFSSL_CTX` | OpenSSL-compatible shim | `wolfSSL_CTX_set_verify` | `wolfSSL_CTX_EnableOCSPStapling` | +| mbedTLS | `mbedtls_ssl_config` | `mbedtls_x509_crt_parse` | `mbedtls_x509_crt_verify` | `mbedtls_x509_crl_parse` | +| GnuTLS | `gnutls_certificate_credentials_t` | `gnutls_certificate_set_x509_key_mem` | `gnutls_certificate_verify_peers` | `gnutls_certificate_set_ocsp_status_request_file` | +| Schannel | `SCH_CREDENTIALS` | In-memory `HCERTSTORE` | Automatic via Windows crypto | `SCH_CRED_REVOCATION_CHECK_*` flags | +| Secure Transport | `SSLContextRef` | Keychain or `SecIdentityRef` | Automatic via macOS trust evaluation | System-managed | +| s2n | `s2n_config` | `s2n_config_add_cert_chain_and_key` | `s2n_config_set_verification_ca_location` | Limited | + +## Out of Scope + +- Connection-level state (handshake, read/write, shutdown) +- Async I/O integration and completion models +- Session resumption and ticket management +- Client certificate request/selection callbacks (server-side mutual TLS negotiation) +- HTTP fetching of revocation data (application responsibility, but may be provided as separate component) + +## Search Terms + +``` +portable TLS abstraction API +SSL context wrapper library +X.509 credential loading interface +cross-platform TLS configuration +backend-agnostic SSL library +TLS provider abstraction C++ +certificate verification abstraction +OCSP stapling API design +CRL distribution points fetching +``` + +## Prior Art + +- Boost.Beast/Boost.Asio SSL (OpenSSL-only, interface patterns) +- libcurl TLS backend abstraction +- Qt Network SSL abstraction (`QSslConfiguration`) +- Rust `native-tls` and `rustls` crates +- Go `crypto/tls.Config` +- .NET `SslStream` / `SslClientAuthenticationOptions` +- Python `ssl.SSLContext` \ No newline at end of file diff --git a/context/wolfssl.md b/context/wolfssl.md deleted file mode 100644 index a7a6c2d..0000000 --- a/context/wolfssl.md +++ /dev/null @@ -1,2158 +0,0 @@ -# wolfSSL Documentation - ---- - -## Table of Contents - -- [1. Introduction](#1-introduction) - - [1.1 Why Choose wolfSSL?](#11-why-choose-wolfssl) -- [2. Building wolfSSL](#2-building-wolfssl) - - [2.1 Getting wolfSSL Source Code](#21-getting-wolfssl-source-code) - - [2.2 Building on Unix-like Systems](#22-building-on-unix-like-systems) - - [2.3 Building on Windows](#23-building-on-windows) - - [2.4 Building in a non-standard environment](#24-building-in-a-non-standard-environment) - - [2.5 Features Defined as C Pre-processor Macro](#25-features-defined-as-c-pre-processor-macro) - - [2.6 Build Options](#26-build-options) - - [2.7 Special Math Optimization Flags](#27-special-math-optimization-flags) - - [2.8 Cross Compiling](#28-cross-compiling) - - [2.9 Building Ports](#29-building-ports) - - [2.10 Building For NXP CAAM](#210-building-for-nxp-caam) -- [3. Getting Started](#3-getting-started) - - [3.1 General Description](#31-general-description) - - [3.2 Testsuite](#32-testsuite) - - [3.3 Client Example](#33-client-example) - - [3.4 Server Example](#34-server-example) - - [3.5 EchoServer Example](#35-echoserver-example) - - [3.6 EchoClient Example](#36-echoclient-example) - - [3.7 Benchmark](#37-benchmark) - - [3.8 Changing a Client Application to Use wolfSSL](#38-changing-a-client-application-to-use-wolfssl) - - [3.9 Changing a Server Application to Use wolfSSL](#39-changing-a-server-application-to-use-wolfssl) -- [4. Features](#4-features) - - [4.1 Features Overview](#41-features-overview) - - [4.2 Protocol Support](#42-protocol-support) - - [4.3 Cipher Support](#43-cipher-support) - - [4.4 Hardware Accelerated Crypto](#44-hardware-accelerated-crypto) - - [4.5 SSL Inspection (Sniffer)](#45-ssl-inspection-sniffer) - - [4.6 Static Buffer Allocation Option](#46-static-buffer-allocation-option) - - [4.7 Compression](#47-compression) - - [4.8 Pre-Shared Keys](#48-pre-shared-keys) - - [4.9 Client Authentication](#49-client-authentication) - - [4.10 Server Name Indication](#410-server-name-indication) - - [4.11 Handshake Modifications](#411-handshake-modifications) - - [4.12 Truncated HMAC](#412-truncated-hmac) - - [4.13 Timing-Resistance in wolfSSL](#413-timing-resistance-in-wolfssl) - - [4.14 Fixed ABI](#414-fixed-abi) -- [5. Portability](#5-portability) - - [5.1 Abstraction Layers](#51-abstraction-layers) - - [5.2 Supported Operating Systems](#52-supported-operating-systems) - - [5.3 Supported Chipmakers](#53-supported-chipmakers) - - [5.4 C# Wrapper](#54-c-wrapper) -- [6. Callbacks](#6-callbacks) - - [6.1 HandShake Callback](#61-handshake-callback) - - [6.2 Timeout Callback](#62-timeout-callback) - - [6.3 User Atomic Record Layer Processing](#63-user-atomic-record-layer-processing) - - [6.4 Public Key Callbacks](#64-public-key-callbacks) - - [6.5 Crypto Callbacks (cryptocb)](#65-crypto-callbacks-cryptocb) -- [7. Keys and Certificates](#7-keys-and-certificates) - - [7.1 Supported Formats and Sizes](#71-supported-formats-and-sizes) - - [7.2 Supported Certificate Extensions](#72-supported-certificate-extensions) - - [7.3 Certificate Loading](#73-certificate-loading) - - [7.4 Certificate Chain Verification](#74-certificate-chain-verification) - - [7.5 Domain Name Check for Server Certificates](#75-domain-name-check-for-server-certificates) - - [7.6 No File System and using Certificates](#76-no-file-system-and-using-certificates) - - [7.7 Serial Number Retrieval](#77-serial-number-retrieval) - - [7.8 RSA Key Generation](#78-rsa-key-generation) - - [7.9 Certificate Generation](#79-certificate-generation) - - [7.10 Certificate Signing Request (CSR) Generation](#710-certificate-signing-request-csr-generation) - - [7.11 Convert to raw ECC key](#711-convert-to-raw-ecc-key) -- [8. Debugging](#8-debugging) - - [8.1 Debugging and Logging](#81-debugging-and-logging) - - [8.2 Error Codes](#82-error-codes) -- [9. Library Design](#9-library-design) - - [9.1 Library Headers](#91-library-headers) - - [9.2 Startup and Exit](#92-startup-and-exit) - - [9.3 Structure Usage](#93-structure-usage) - - [9.4 Thread Safety](#94-thread-safety) - - [9.5 Input and Output Buffers](#95-input-and-output-buffers) -- [10. wolfCrypt Usage Reference](#10-wolfcrypt-usage-reference) - - [10.1 Hash Functions](#101-hash-functions) - - [10.2 Keyed Hash Functions](#102-keyed-hash-functions) - - [10.3 Block Ciphers](#103-block-ciphers) - - [10.4 Stream Ciphers](#104-stream-ciphers) - - [10.5 Public Key Cryptography](#105-public-key-cryptography) -- [11. SSL Tutorial](#11-ssl-tutorial) - - [11.1 Introduction](#111-introduction) - - [11.2 Quick Summary of SSL/TLS](#112-quick-summary-of-ssltls) - - [11.3 Getting the Source Code](#113-getting-the-source-code) - - [11.4 Base Example Modifications](#114-base-example-modifications) - - [11.5 Building and Installing wolfSSL](#115-building-and-installing-wolfssl) - - [11.6 Initial Compilation](#116-initial-compilation) - - [11.7 Libraries](#117-libraries) - - [11.8 Headers](#118-headers) - - [11.9 Startup/Shutdown](#119-startupshutdown) - - [11.10 WOLFSSL_CTX Factory](#1110-wolfssl_ctx-factory) - - [11.11 WOLFSSL Object](#1111-wolfssl-object) - - [11.12 Sending/Receiving Data](#1112-sendingreceiving-data) - - [11.13 Signal Handling](#1113-signal-handling) - - [11.14 Certificates](#1114-certificates) - - [11.15 Conclusion](#1115-conclusion) -- [12. Best Practices for Embedded Devices](#12-best-practices-for-embedded-devices) - - [12.1 Creating Private Keys](#121-creating-private-keys) - - [12.2 Digitally Signing and Authenticating with wolfSSL](#122-digitally-signing-and-authenticating-with-wolfssl) -- [13. OpenSSL Compatibility](#13-openssl-compatibility) - - [13.1 Compatibility with OpenSSL](#131-compatibility-with-openssl) - - [13.2 Differences Between wolfSSL and OpenSSL](#132-differences-between-wolfssl-and-openssl) - - [13.3 Supported OpenSSL Structures](#133-supported-openssl-structures) - - [13.4 Supported OpenSSL Functions](#134-supported-openssl-functions) - - [13.5 x509 Certificates](#135-x509-certificates) -- [14. Licensing](#14-licensing) - - [14.1 Open Source](#141-open-source) - - [14.2 Commercial Licensing](#142-commercial-licensing) - - [14.3 FIPS 140-2/3 Validation](#143-fips-140-23-validation) - - [14.4 Support Packages](#144-support-packages) -- [15. Support and Consulting](#15-support-and-consulting) - - [15.1 How to Get Support](#151-how-to-get-support) - - [15.2 Consulting](#152-consulting) -- [16. wolfSSL Updates](#16-wolfssl-updates) - - [16.1 Product Release Information](#161-product-release-information) -- [Appendix A: wolfSSL API Reference](#appendix-a-wolfssl-api-reference) -- [Appendix B: wolfCrypt API Reference](#appendix-b-wolfcrypt-api-reference) -- [Appendix C: API Header Files](#appendix-c-api-header-files) -- [Appendix D: Error Codes](#appendix-d-error-codes) -- [Appendix E: Library Comparison](#appendix-e-library-comparison) -- [Appendix F: wolfSSL Release Comparison](#appendix-f-wolfssl-release-comparison) -- [Appendix G: wolfSSL Porting Guide](#appendix-g-wolfssl-porting-guide) -- [Appendix H: wolfSM (ShangMi)](#appendix-h-wolfsm-shangmi) - ---- - -# 1. Introduction - -This manual is written as a technical guide to the wolfSSL embedded SSL/TLS library. It will explain how to build and get started with wolfSSL, provide an overview of build options, features, portability enhancements, support, and much more. - -## 1.1 Why Choose wolfSSL? - -There are many reasons to choose wolfSSL as your embedded SSL solution. Some of the top reasons include size (typical footprint sizes range from 20-100 kB), support for the newest standards (SSL 3.0, TLS 1.0, TLS 1.1, TLS 1.2, TLS 1.3, DTLS 1.0, DTLS 1.2, and DTLS 1.3), current and progressive cipher support (including stream ciphers), multi-platform, royalty free, and an OpenSSL compatibility API to ease porting into existing applications which have previously used the OpenSSL package. For a complete feature list, see [Features Overview](#41-features-overview). - ---- - -# 2. Building wolfSSL - -wolfSSL was written with portability in mind and should generally be easy to build on most systems. If you have difficulty building wolfSSL, please don't hesitate to seek support through our support forums (https://www.wolfssl.com/forums) or contact us directly at support@wolfssl.com. - -This chapter explains how to build wolfSSL on Unix and Windows, and provides guidance for building wolfSSL in a non-standard environment. You will find the "getting started" guide in [Chapter 3](#3-getting-started) and an SSL tutorial in [Chapter 11](#11-ssl-tutorial). - -When using the autoconf / automake system to build wolfSSL, wolfSSL uses a single Makefile to build all parts and examples of the library, which is both simpler and faster than using Makefiles recursively. - -## 2.1 Getting wolfSSL Source Code - -The most recent version of wolfSSL can be downloaded from the wolfSSL website as a ZIP file: - -https://www.wolfssl.com/download - -After downloading the ZIP file, unzip the file using the `unzip` command. To use native line endings, enable the `-a` modifier when using unzip. From the unzip man page, the `-a` modifier functionality is described: - -> The -a option causes files identified by zip as text files (those with the 't' label in zipinfo listings, rather than 'b') to be automatically extracted as such, converting line endings, end-of-file characters and the character set itself as necessary. - -**NOTE:** Beginning with the release of wolfSSL 2.0.0rc3, the directory structure of wolfSSL was changed as well as the standard install location. These changes were made to make it easier for open source projects to integrate wolfSSL. For more information on header and structure changes, please see [Library Headers](#91-library-headers) and [Structure Usage](#93-structure-usage). - -## 2.2 Building on Unix-like Systems - -When building wolfSSL on Linux, *BSD, OS X, Solaris, or other *nix-like systems, use the autoconf system. To build wolfSSL you only need to run two commands from the wolfSSL root directory, `./configure` and `make`. - -The `./configure` script sets up the build environment and you can append any number of build options to `./configure`. For a list of available build options, please see [Build Options](#26-build-options) or run the following command: - -```bash -./configure --help -``` - -Once `./configure` has successfully executed, to build wolfSSL, run: - -```bash -make -``` - -To install wolfSSL run: - -```bash -make install -``` - -You may need superuser privileges to install, in which case precede the command with sudo: - -```bash -sudo make install -``` - -To test the build, run the testsuite program from the root wolfSSL directory: - -```bash -./testsuite/testsuite.test -``` - -Alternatively you can use autoconf to run the testsuite as well as the standard wolfSSL API and crypto tests: - -```bash -make test -``` - -If you want to build only the wolfSSL library and not the additional items (examples, testsuite, benchmark app, etc.): - -```bash -make src/libwolfssl.la -``` - -## 2.3 Building on Windows - -### 2.3.1 VS 2008 - -Solutions are included for Visual Studio 2008 in the root directory of the install. For use with Visual Studio 2010 and later, the existing project files should be able to be converted during the import process. - -### 2.3.2 VS 2010 - -You will need to download Service Pack 1 to build wolfSSL solution once it has been updated. - -### 2.3.3 VS 2013 (64 bit solution) - -You will need to download Service Pack 4 to build wolfSSL solution once it has been updated. - -**Note:** After the wolfSSL v3.8.0 release the build preprocessor macros were moved to a centralized file located at `IDE/WIN/user_settings.h`. To add features such as ECC or ChaCha20/Poly1305, add `#defines` here such as `HAVE_ECC` or `HAVE_CHACHA` / `HAVE_POLY1305`. - -### 2.3.4 Cygwin - -If building wolfSSL for Windows on a Windows development machine, we recommend using the included Visual Studio project files. However, if Cygwin is required, the additional packages needed include: - -- unzip -- autoconf -- automake -- gcc-core -- git -- libtool -- make - -After installing Cygwin with those packages: - -```bash -git clone https://github.com/wolfssl/wolfssl.git -cd wolfssl -./autogen.sh -./configure -make -make check -``` - -## 2.4 Building in a non-standard environment - -While not officially supported, we try to help users wishing to build wolfSSL in a non-standard environment, particularly with embedded and cross-compilation systems. Key notes: - -1. The source and header files need to remain in the same directory structure as they are in the wolfSSL download package. -2. wolfSSL header files are located in the `/wolfssl` directory. -3. wolfSSL defaults to a little endian system unless the configure process detects big endian. Define `BIG_ENDIAN_ORDER` if using a big endian system. -4. wolfSSL benefits speed-wise from having a 64-bit type available. Define `SIZEOF_LONG 8` or `SIZEOF_LONG_LONG 8` as appropriate. - -### 2.4.1 Building into Yocto Linux - -wolfSSL includes recipes for building on Yocto Linux and OpenEmbedded. These recipes are maintained within the meta-wolfSSL layer: https://github.com/wolfSSL/meta-wolfssl - -### 2.4.2 Building with Atollic TrueSTUDIO - -Versions of wolfSSL following 3.15.5 include a TrueSTUDIO project file for ARM M4-Cortex devices. - -### 2.4.3 Building with IAR - -The `/IDE/IAR-EWARM` directory contains workspace and project files for IAR Embedded Workbench. - -### 2.4.4 Building on OS X and iOS - -The `/IDE/XCODE` directory contains Xcode workspace and project files. - -### 2.4.5 Building with GCC ARM - -In the `/IDE/GCC-ARM` directory, you will find an example wolfSSL project for Cortex M series. - -Cross-compile example: - -```bash -./configure \ - --host=arm-none-eabi \ - CC=arm-none-eabi-gcc \ - AR=arm-none-eabi-ar \ - STRIP=arm-none-eabi-strip \ - RANLIB=arm-none-eabi-ranlib \ - --prefix=/path/to/build/wolfssl-arm \ - CFLAGS="-march=armv8-a --specs=nosys.specs \ - -DHAVE_PK_CALLBACKS -DWOLFSSL_USER_IO -DWOLFSSL_NO_SOCK -DNO_WRITEV" \ - --disable-filesystem --enable-crypttests \ - --disable-shared -make -make install -``` - -### 2.4.6 Building on Keil MDK-ARM - -Detailed instructions for building wolfSSL on Keil MDK-ARM are available on the wolfSSL website. - -## 2.5 Features Defined as C Pre-processor Macro - -### 2.5.1 Removing Features - -The following defines can be used to remove features from wolfSSL to reduce library footprint size: - -| Define | Description | -|--------|-------------| -| `NO_WOLFSSL_CLIENT` | Removes client-specific calls (server-only builds) | -| `NO_WOLFSSL_SERVER` | Removes server-specific calls | -| `NO_DES3` | Removes DES3 encryption | -| `NO_DSA` | Removes DSA support | -| `NO_ERROR_STRINGS` | Disables error strings | -| `NO_HMAC` | Removes HMAC (note: SSL/TLS depends on HMAC) | -| `NO_MD4` | Removes MD4 (broken, shouldn't be used) | -| `NO_MD5` | Removes MD5 | -| `NO_SHA` | Removes SHA-1 | -| `NO_SHA256` | Removes SHA-256 | -| `NO_PSK` | Turns off pre-shared key extension | -| `NO_PWDBASED` | Disables password-based key derivation (PBKDF1, PBKDF2) | -| `NO_RC4` | Removes ARC4 stream cipher | -| `NO_SESSION_CACHE` | Removes session cache (~3 kB savings) | -| `NO_TLS` | Turns off TLS (not recommended) | -| `SMALL_SESSION_CACHE` | Limits session cache from 33 to 6 sessions (~2.5 kB savings) | -| `NO_RSA` | Removes RSA algorithm support | -| `NO_AES` | Disables AES algorithm support | -| `NO_DH` | Disables Diffie-Hellman support | -| `WOLFCRYPT_ONLY` | Enables wolfCrypt only, disables TLS | - -### 2.5.2 Enabling Features (On by Default) - -| Define | Description | -|--------|-------------| -| `HAVE_TLS_EXTENSIONS` | TLS extensions support (required for most builds) | -| `HAVE_SUPPORTED_CURVES` | TLS supported curves and key share extensions | -| `HAVE_EXTENDED_MASTER` | Extended master secret PRF for TLS v1.2 and older | -| `HAVE_ENCRYPT_THEN_MAC` | Encrypt-then-mac support for block ciphers | -| `WOLFSSL_ASN_TEMPLATE` | Template-based ASN.1 processing | - -### 2.5.3 Enabling Features Disabled by Default - -| Define | Description | -|--------|-------------| -| `WOLFSSL_CERT_GEN` | Certificate generation functionality | -| `WOLFSSL_DER_LOAD` | Loading DER-formatted CA certs | -| `WOLFSSL_DTLS` | DTLS support | -| `WOLFSSL_KEY_GEN` | RSA key generation | -| `DEBUG_WOLFSSL` | Debug support | -| `HAVE_AESCCM` | AES-CCM support | -| `HAVE_AESGCM` | AES-GCM support | -| `WOLFSSL_AES_XTS` | AES-XTS support | -| `HAVE_CHACHA` | ChaCha20 support | -| `HAVE_POLY1305` | Poly1305 support | -| `HAVE_CRL` | Certificate Revocation List support | -| `HAVE_ECC` | Elliptical Curve Cryptography support | -| `HAVE_OCSP` | Online Certificate Status Protocol support | -| `OPENSSL_EXTRA` | Extended OpenSSL compatibility | -| `WOLFSSL_SHA3` | SHA3 support | -| `WOLFSSL_SHA512` | SHA-512 support | -| `WOLFSSL_TLS13` | TLS 1.3 protocol support | -| `WOLFSSL_DTLS13` | DTLS 1.3 support | - -### 2.5.4 Customizing or Porting wolfSSL - -| Define | Description | -|--------|-------------| -| `WOLFSSL_USER_SETTINGS` | Enables user-specific settings file (`user_settings.h`) | -| `WOLFSSL_USER_IO` | Custom I/O abstraction layer | -| `NO_FILESYSTEM` | Disables stdio for loading certificates/keys | -| `NO_INLINE` | Disables automatic function inlining | -| `NO_DEV_RANDOM` | Disables `/dev/random` | -| `SINGLE_THREADED` | Disables mutex usage | -| `USER_TICKS` | Custom clock tick function | -| `USE_CERT_BUFFERS_2048` | Enables 2048-bit test certificate buffers | - -### 2.5.5 Reducing Memory or Code Usage - -| Define | Description | -|--------|-------------| -| `TFM_TIMING_RESISTANT` | Reduces stack usage with fast math | -| `ECC_TIMING_RESISTANT` | Prevents side channel attacks in ECC | -| `WOLFSSL_SMALL_STACK` | Uses dynamic memory instead of stack | -| `ALT_ECC_SIZE` | Allocates ECC points from heap instead of stack | -| `RSA_LOW_MEM` | Disables CRT, saves memory but slower | -| `WOLFSSL_SHA3_SMALL` | Reduces SHA3 build size | -| `GCM_SMALL` | Calculates AES-GCM at runtime instead of using tables | -| `USE_SLOW_SHA` | Reduces SHA code size | -| `USE_SLOW_SHA256` | Reduces SHA-256 code size | -| `USE_SLOW_SHA512` | Reduces SHA-512 code size | - -### 2.5.6 Increasing Performance - -| Define | Description | -|--------|-------------| -| `USE_INTEL_SPEEDUP` | Intel AVX/AVX2 instructions for AES, ChaCha20, etc. | -| `WOLFSSL_AESNI` | AES-NI hardware acceleration | -| `HAVE_INTEL_RDSEED` | Intel RDSEED for DRBG seed | -| `HAVE_INTEL_RDRAND` | Intel RDRAND for random source | -| `FP_ECC` | ECC Fixed Point Cache | -| `WOLFSSL_ASYNC_CRYPT` | Asynchronous cryptography support | - -### 2.5.7 wolfSSL's Math Options - -wolfSSL has three math libraries: - -1. **Big Integer Library** (deprecated) - Most portable, written in C, uses heap memory -2. **Fast Math Library** - Uses assembly optimizations, stack-based, timing resistant with `TFM_TIMING_RESISTANT` -3. **Single Precision (SP) Math Library** (recommended) - Optimized for speed, always timing resistant, DO-178C certified - -**SP Math Defines:** - -| Define | Description | -|--------|-------------| -| `WOLFSSL_SP` | Enable Single Precision math | -| `WOLFSSL_SP_MATH` | SP math only (restricted algorithms) | -| `WOLFSSL_SP_MATH_ALL` | SP math with full algorithm suite | -| `WOLFSSL_SP_SMALL` | Smaller SP code, avoids large stack variables | -| `WOLFSSL_HAVE_SP_RSA` | SP RSA for 2048, 3072, 4096 bit | -| `WOLFSSL_HAVE_SP_DH` | SP DH for 2048, 3072, 4096 bit | -| `WOLFSSL_HAVE_SP_ECC` | SP ECC for SECP256R1 and SECP384R1 | - -## 2.6 Build Options - -The following are options for the `./configure` script: - -### Debug and Threading - -| Option | Description | -|--------|-------------| -| `--enable-debug` | Enable debugging support | -| `--enable-singlethread` | Single threaded mode | - -### Protocol Options - -| Option | Description | -|--------|-------------| -| `--enable-dtls` | Enable DTLS support | -| `--enable-tls13` | Enable TLS 1.3 | -| `--disable-tlsv12` | Disable TLS 1.2 | -| `--disable-oldtls` | Disable TLS < 1.2 | - -### OpenSSL Compatibility - -| Option | Description | -|--------|-------------| -| `--enable-opensslextra` | Extra OpenSSL API compatibility | -| `--enable-opensslall` | All OpenSSL API | - -### Cipher Options - -| Option | Description | -|--------|-------------| -| `--enable-aesgcm` | AES-GCM support | -| `--enable-aesccm` | AES-CCM support | -| `--enable-aesni` | Intel AES-NI support | -| `--enable-chacha` | ChaCha20 support | -| `--enable-poly1305` | Poly1305 support | -| `--enable-camellia` | Camellia support | - -### ECC Options - -| Option | Description | -|--------|-------------| -| `--enable-ecc` | ECC support | -| `--enable-curve25519` | Curve25519 support | -| `--enable-ed25519` | Ed25519 support | - -### Certificate Options - -| Option | Description | -|--------|-------------| -| `--enable-certgen` | Certificate generation | -| `--enable-certreq` | Certificate request generation | -| `--enable-certext` | Certificate extensions | -| `--enable-keygen` | RSA key generation | -| `--enable-ocsp` | OCSP support | -| `--enable-crl` | CRL support | - -### Math Options - -| Option | Description | -|--------|-------------| -| `--enable-fastmath` | FastMath implementation | -| `--enable-sp-math` | Single-Precision math only | -| `--enable-sp-math-all` | SP math with full algorithm suite (default) | -| `--enable-sp=OPT` | SP math for specific algorithms | - -## 2.7 Special Math Optimization Flags - -See section 2.5.8 for detailed math library options. - -## 2.8 Cross Compiling - -When cross compiling, specify the host to `./configure`: - -```bash -./configure --host=arm-linux -``` - -You may also need to specify compiler tools: - -```bash -./configure --host=arm-linux CC=arm-linux-gcc AR=arm-linux-ar RANLIB=arm-linux-ranlib -``` - -## 2.9 Building Ports - -wolfSSL has been ported to many environments. Port-specific files are in `wolfssl-X.X.X/IDE/`: - -- Arduino, LPCXPRESSO, Wiced Studio -- SGX (Windows and Linux) -- Hexagon, Hexiwear -- NetBurner M68K, Renesas -- XCode, Eclipse, Espressif -- IAR-EWARM, Kinetis Design Studio -- OpenSTM32, RISCV, Zephyr, Mynewt - ---- - -# 3. Getting Started - -## 3.1 General Description - -wolfSSL (formerly CyaSSL) is about 10 times smaller than yaSSL, and up to 20 times smaller than OpenSSL when using optimized compile options. User benchmarking reports dramatically better performance from wolfSSL vs. OpenSSL in standard SSL operations. - -## 3.2 Testsuite - -The testsuite program tests wolfSSL and wolfCrypt functionality. Run from the wolfSSL home directory: - -```bash -./testsuite/testsuite.test -``` - -Or with autoconf: - -```bash -make test -``` - -A successful run shows output like: - -``` ------------------------------------------------------------------------------- -wolfSSL version 4.8.1 ------------------------------------------------------------------------------- -error test passed! -MEMORY test passed! -base64 test passed! -... -SHA-256 test passed! -... -AES test passed! -AES-GCM test passed! -RSA test passed! -DH test passed! -ECC test passed! -... -Test complete -``` - -## 3.3 Client Example - -Test wolfSSL against any SSL server using the client example: - -```bash -./examples/client/client --help -``` - -Example connecting to a public server: - -```bash -./examples/client/client -h example.com -p 443 -d -g -``` - -## 3.4 Server Example - -The server example listens for client connections: - -```bash -./examples/server/server --help -``` - -Start a basic server: - -```bash -./examples/server/server -``` - -Then connect with the client: - -```bash -./examples/client/client -``` - -## 3.5 EchoServer Example - -The echoserver echoes any received data back to the client: - -```bash -./examples/echoserver/echoserver -``` - -## 3.6 EchoClient Example - -Connect to the echoserver: - -```bash -./examples/echoclient/echoclient -``` - -## 3.7 Benchmark - -The benchmark application tests wolfCrypt algorithm performance: - -```bash -./wolfcrypt/benchmark/benchmark -``` - -Example output: - -``` -wolfCrypt Benchmark (block bytes 1048576, min 1.0 sec each) -RNG 105 MiB took 1.004 seconds, 104.576 MiB/s -AES-128-CBC-enc 395 MiB took 1.001 seconds, 394.478 MiB/s -AES-128-CBC-dec 380 MiB took 1.003 seconds, 378.723 MiB/s -AES-256-CBC-enc 290 MiB took 1.002 seconds, 289.532 MiB/s -AES-256-CBC-dec 285 MiB took 1.001 seconds, 284.695 MiB/s -AES-128-GCM-enc 185 MiB took 1.009 seconds, 183.510 MiB/s -AES-128-GCM-dec 185 MiB took 1.001 seconds, 184.740 MiB/s -... -RSA 2048 public 6400 ops took 1.008 sec, avg 0.158 ms, 6349.206 ops/sec -RSA 2048 private 300 ops took 1.073 sec, avg 3.576 ms, 279.647 ops/sec -ECC [SECP256R1] 256 key gen 2200 ops took 1.012 sec, avg 0.460 ms, 2173.913 ops/sec -ECDHE [SECP256R1] 256 agree 2500 ops took 1.006 sec, avg 0.403 ms, 2483.559 ops/sec -ECDSA [SECP256R1] 256 sign 3100 ops took 1.001 sec, avg 0.323 ms, 3096.904 ops/sec -ECDSA [SECP256R1] 256 verify 1200 ops took 1.007 sec, avg 0.839 ms, 1191.658 ops/sec -``` - -## 3.8 Changing a Client Application to Use wolfSSL - -Basic steps to add wolfSSL to an existing client: - -1. Include the wolfSSL header: - ```c - #include - ``` - -2. Initialize wolfSSL: - ```c - wolfSSL_Init(); - ``` - -3. Create a context: - ```c - WOLFSSL_CTX* ctx = wolfSSL_CTX_new(wolfTLSv1_2_client_method()); - ``` - -4. Load CA certificates: - ```c - wolfSSL_CTX_load_verify_locations(ctx, "ca-cert.pem", NULL); - ``` - -5. Create an SSL object and associate with socket: - ```c - WOLFSSL* ssl = wolfSSL_new(ctx); - wolfSSL_set_fd(ssl, sockfd); - ``` - -6. Perform SSL handshake: - ```c - wolfSSL_connect(ssl); - ``` - -7. Send/receive data: - ```c - wolfSSL_write(ssl, data, len); - wolfSSL_read(ssl, buffer, sizeof(buffer)); - ``` - -8. Cleanup: - ```c - wolfSSL_free(ssl); - wolfSSL_CTX_free(ctx); - wolfSSL_Cleanup(); - ``` - -## 3.9 Changing a Server Application to Use wolfSSL - -Similar to the client, but use server methods and load server certificates: - -```c -WOLFSSL_CTX* ctx = wolfSSL_CTX_new(wolfTLSv1_2_server_method()); -wolfSSL_CTX_use_certificate_file(ctx, "server-cert.pem", SSL_FILETYPE_PEM); -wolfSSL_CTX_use_PrivateKey_file(ctx, "server-key.pem", SSL_FILETYPE_PEM); - -// After accept() -WOLFSSL* ssl = wolfSSL_new(ctx); -wolfSSL_set_fd(ssl, clientfd); -wolfSSL_accept(ssl); -``` - ---- - -# 4. Features - -wolfSSL supports the C programming language as a primary interface, but also supports several other host languages, including Java, PHP, Perl, and Python (through a SWIG interface). - -## 4.1 Features Overview - -For an overview of wolfSSL features, please reference the wolfSSL product webpage: https://www.wolfssl.com/products/wolfssl - -## 4.2 Protocol Support - -wolfSSL supports SSL 3.0, TLS (1.0, 1.1, 1.2, 1.3), and DTLS (1.0, 1.2, 1.3). wolfSSL does not support SSL 2.0, as it has been insecure for several years. - -### 4.2.1 Server Functions - -| Function | Protocol | -|----------|----------| -| `wolfDTLSv1_server_method()` | DTLS 1.0 | -| `wolfDTLSv1_2_server_method()` | DTLS 1.2 | -| `wolfSSLv3_server_method()` | SSL 3.0 | -| `wolfTLSv1_server_method()` | TLS 1.0 | -| `wolfTLSv1_1_server_method()` | TLS 1.1 | -| `wolfTLSv1_2_server_method()` | TLS 1.2 | -| `wolfTLSv1_3_server_method()` | TLS 1.3 | -| `wolfSSLv23_server_method()` | SSLv3 - TLS 1.2 (highest available) | - -### 4.2.2 Client Functions - -| Function | Protocol | -|----------|----------| -| `wolfDTLSv1_client_method()` | DTLS 1.0 | -| `wolfDTLSv1_2_client_method_ex()` | DTLS 1.2 | -| `wolfSSLv3_client_method()` | SSL 3.0 | -| `wolfTLSv1_client_method()` | TLS 1.0 | -| `wolfTLSv1_1_client_method()` | TLS 1.1 | -| `wolfTLSv1_2_client_method()` | TLS 1.2 | -| `wolfTLSv1_3_client_method()` | TLS 1.3 | -| `wolfSSLv23_client_method()` | SSLv3 - TLS 1.2 (highest available) | - -### 4.2.3 Robust Client and Server Downgrade - -Both wolfSSL clients and servers have robust version downgrade capability. Using `wolfSSLv23_client_method()` or `wolfSSLv23_server_method()` allows negotiation to the highest protocol version supported by both sides. - -### 4.2.4 IPv6 Support - -wolfSSL supports IPv6. The library was designed as IP neutral and will work with both IPv4 and IPv6. To enable IPv6 in test applications: - -```bash -./configure --enable-ipv6 -``` - -### 4.2.5 DTLS - -wolfSSL has support for DTLS ("Datagram" TLS) for both client and server. Supported versions are DTLS 1.0, 1.2, and 1.3. - -```bash -./configure --enable-dtls -``` - -### 4.2.6 TLS Extensions - -| RFC | Extension | wolfSSL Type | -|-----|-----------|--------------| -| 6066 | Server Name Indication | `TLSX_SERVER_NAME` | -| 6066 | Maximum Fragment Length | `TLSX_MAX_FRAGMENT_LENGTH` | -| 6066 | Truncated HMAC | `TLSX_TRUNCATED_HMAC` | -| 6066 | Status Request | `TLSX_STATUS_REQUEST` | -| 7919 | Supported Groups | `TLSX_SUPPORTED_GROUPS` | -| 5246 | Signature Algorithm | `TLSX_SIGNATURE_ALGORITHMS` | -| 7301 | ALPN | `TLSX_APPLICATION_LAYER_PROTOCOL` | -| 5077 | Session Ticket | `TLSX_SESSION_TICKET` | -| 5746 | Renegotiation Indication | `TLSX_RENEGOTIATION_INFO` | -| 8446 | Key Share | `TLSX_KEY_SHARE` | -| 8446 | Pre Shared Key | `TLSX_PRE_SHARED_KEY` | -| 8446 | PSK Key Exchange Modes | `TLSX_PSK_KEY_EXCHANGE_MODES` | -| 8446 | Early Data | `TLSX_EARLY_DATA` | -| 8446 | Supported Versions | `TLSX_SUPPORTED_VERSIONS` | - -## 4.3 Cipher Support - -### 4.3.1 Cipher Suite Strength - -Cipher suite strength classification based on symmetric encryption algorithm: - -- **LOW** - bits of security smaller than 128 bits -- **MEDIUM** - bits of security equal to 128 bits -- **HIGH** - bits of security larger than 128 bits - -### 4.3.2 Supported Cipher Suites - -**ECC cipher suites:** -- `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA` -- `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA` -- `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA` -- `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA` -- `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256` -- `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256` -- `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384` -- `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384` - -**AES-GCM cipher suites:** -- `TLS_RSA_WITH_AES_128_GCM_SHA256` -- `TLS_RSA_WITH_AES_256_GCM_SHA384` -- `TLS_DHE_RSA_WITH_AES_128_GCM_SHA256` -- `TLS_DHE_RSA_WITH_AES_256_GCM_SHA384` -- `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256` -- `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384` -- `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256` -- `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384` - -**ChaCha cipher suites:** -- `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256` -- `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256` -- `TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256` - -**PSK cipher suites:** -- `TLS_PSK_WITH_AES_256_CBC_SHA` -- `TLS_PSK_WITH_AES_128_CBC_SHA256` -- `TLS_PSK_WITH_AES_128_GCM_SHA256` -- `TLS_PSK_WITH_AES_256_GCM_SHA384` -- `TLS_PSK_WITH_CHACHA20_POLY1305_SHA256` - -### 4.3.3 AEAD Suites - -wolfSSL supports AEAD suites including AES-GCM, AES-CCM, and CHACHA-POLY1305. These authenticate encrypted data with any additional cleartext data, helping mitigate man-in-the-middle attacks. - -### 4.3.4 Block and Stream Ciphers - -- **Block ciphers:** AES, DES, 3DES, Camellia -- **Stream ciphers:** RC4, ChaCha20 - -AES modes: CBC (default), GCM (`--enable-aesgcm`), CCM (`--enable-aesccm`) - -### 4.3.5 Hashing Functions - -Supported hash functions: MD2, MD4, MD5, SHA-1, SHA-2 (SHA-224, SHA-256, SHA-384, SHA-512), SHA-3, BLAKE2, RIPEMD-160. - -### 4.3.6 Public Key Options - -wolfSSL supports RSA, ECC, DSA/DSS, and DH public key options, with support for EDH (Ephemeral Diffie-Hellman). - -**Post-Quantum Algorithms:** -- **ML-KEM** (Module Lattice Key Encapsulation Mechanism) - NIST FIPS 203 -- **ML-DSA** (Module Lattice Digital Signature Algorithm) - NIST FIPS 204 - -### 4.3.7 ECC Support - -Enable ECC with: - -```bash -./configure --enable-ecc -``` - -Example server with ECDHE-ECDSA: - -```bash -./examples/server/server -d -l ECDHE-ECDSA-AES256-SHA -c ./certs/server-ecc.pem -k ./certs/ecc-key.pem -``` - -### 4.3.8 PKCS Support - -wolfSSL supports PKCS #1, #3, #5, #7, #8, #9, #10, #11, and #12. - -**PKCS #5 (PBKDF):** - -```c -int PBKDF2(byte* output, const byte* passwd, int pLen, - const byte* salt, int sLen, int iterations, - int kLen, int hashType); -``` - -Enable with `--enable-pwdbased`. - -**PKCS #7:** - -Enable with `--enable-pkcs7`. Supports: -- Degenerate bundles -- KARI, KEKRI, PWRI, ORI, KTRI bundles -- Detached signatures -- Compressed and Firmware package bundles - -### 4.3.9 Forcing a Specific Cipher - -```c -ctx = wolfSSL_CTX_new(method); -wolfSSL_CTX_set_cipher_list(ctx, "AES128-SHA"); -``` - -## 4.4 Hardware Accelerated Crypto - -### 4.4.1 AES-NI - -Intel AES-NI provides 5-10x faster AES operations: - -```bash -./configure --enable-aesni -``` - -AES-NI accelerates: AES-CBC, AES-GCM, AES-CCM-8, AES-CCM, and AES-CTR. - -### 4.4.2 STM32F2 - -Enable with `WOLFSSL_STM32F2` define. Supports hardware crypto and RNG. - -### 4.4.3 Cavium NITROX - -```bash -./configure --with-cavium=/home/user/cavium/software -``` - -Supports RNG, AES, 3DES, RC4, HMAC, and RSA. - -### 4.4.4 ESP32-WROOM-32 - -Enable with `WOLFSSL_ESPWROOM32`. Supports RNG, AES, SHA, and RSA. - -### 4.4.5 EFR32 - -Enable with `WOLFSSL_SILABS_SE_ACCEL`. Supports RNG, AES-CBC, AES-GCM, AES-CCM, SHA-1, SHA-2, ECDHE, and ECDSA. - -## 4.5 SSL Inspection (Sniffer) - -Build with sniffer support: - -```bash -./configure --enable-sniffer -``` - -Key functions in `sniffer.h`: -- `ssl_SetPrivateKey` - Sets private key for server/port -- `ssl_DecodePacket` - Decodes TCP/IP packet -- `ssl_Trace` - Enables/disables debug tracing -- `ssl_InitSniffer` - Initialize sniffer -- `ssl_FreeSniffer` - Free sniffer -- `ssl_EnableRecovery` - Handle lost packets - -**Note:** The sniffer can only decode streams encrypted with AES-CBC, DES3-CBC, ARC4, and Camellia-CBC. ECDHE or DHE key exchange cannot be sniffed. - -## 4.6 Static Buffer Allocation Option - -wolfSSL can be configured without dynamic memory allocation for environments without malloc/free or safety-critical applications. - -### 4.6.1 Enabling Static Buffer Allocation - -```bash -./configure --enable-staticmemory -``` - -Or in `user_settings.h`: - -```c -#define WOLFSSL_STATIC_MEMORY -#define WOLFSSL_NO_MALLOC // if no malloc available -``` - -### 4.6.2 Using Static Buffer Allocation - -```c -WOLFSSL_CTX* ctx = NULL; -unsigned char GEN_MEM[GEN_MEM_SIZE]; -unsigned char IO_MEM[IO_MEM_SIZE]; - -// First call: set up general-purpose buffer and generate WOLFSSL_CTX -wolfSSL_CTX_load_static_memory( - &ctx, - wolfSSLv23_client_method_ex(), - GEN_MEM, GEN_MEM_SIZE, - WOLFMEM_GENERAL, - MAX_CONCURRENT_TLS); - -// Second call: set up I/O buffer -wolfSSL_CTX_load_static_memory( - &ctx, - NULL, - IO_MEM, IO_MEM_SIZE, - WOLFMEM_IO_FIXED, - MAX_CONCURRENT_IO); -``` - -## 4.7 Compression - -wolfSSL supports data compression with zlib. Define `HAVE_LIBZ` and use: - -```c -wolfSSL_set_compression(ssl); // before connect/accept -``` - -Both client and server must have compression enabled. - -## 4.8 Pre-Shared Keys - -Supported PSK cipher suites include: -- `TLS_PSK_WITH_AES_256_CBC_SHA` -- `TLS_PSK_WITH_AES_128_GCM_SHA256` -- `TLS_PSK_WITH_CHACHA20_POLY1305` -- `ECDHE-PSK-AES128-CBC-SHA256` -- `DHE-PSK-AES256-GCM-SHA384` - -**Client setup:** - -```c -wolfSSL_CTX_set_psk_client_callback(ctx, my_psk_client_cb); -``` - -**Server setup:** - -```c -wolfSSL_CTX_set_psk_server_callback(ctx, my_psk_server_cb); -wolfSSL_CTX_use_psk_identity_hint(ctx, "wolfssl server"); -``` - -## 4.9 Client Authentication - -To require client certificates on the server: - -```c -wolfSSL_CTX_load_verify_locations(ctx, caCert, 0); -wolfSSL_CTX_set_verify(ctx, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, 0); -``` - -## 4.10 Server Name Indication (SNI) - -Enable SNI: - -```bash -./configure --enable-sni -``` - -**Client side:** - -```c -wolfSSL_CTX_UseSNI(ctx, WOLFSSL_SNI_HOST_NAME, "example.com", strlen("example.com")); -// or per-session: -wolfSSL_UseSNI(ssl, WOLFSSL_SNI_HOST_NAME, "example.com", strlen("example.com")); -``` - -## 4.11 Handshake Modifications - -### 4.11.1 Grouping Handshake Messages - -```c -wolfSSL_CTX_set_group_messages(ctx); -``` - -## 4.12 Truncated HMAC - -Enable truncated (80-bit) HMAC for bandwidth-constrained environments: - -```bash -./configure --enable-truncatedhmac -``` - -```c -wolfSSL_CTX_UseTruncatedHMAC(ctx); -// or per-session: -wolfSSL_UseTruncatedHMAC(ssl); -``` - -## 4.13 Timing-Resistance - -wolfSSL provides timing-resistant implementations to prevent side-channel attacks: - -- `ConstantCompare` - constant-time comparison -- `ECC_TIMING_RESISTANT` - ECC timing resistance -- `TFM_TIMING_RESISTANT` - RSA timing resistance -- `WC_RSA_BLINDING` - RSA blinding mode - -Enable with `--enable-harden`. - -## 4.14 Fixed ABI - -Starting with wolfSSL v4.3.0, a subset of API functions have a fixed ABI for binary compatibility across releases: - -- `wolfSSL_Init()`, `wolfSSL_Cleanup()` -- `wolfSSL_CTX_new()`, `wolfSSL_CTX_free()` -- `wolfSSL_new()`, `wolfSSL_free()` -- `wolfSSL_connect()`, `wolfSSL_accept()` -- `wolfSSL_read()`, `wolfSSL_write()` -- `wolfSSL_set_fd()`, `wolfSSL_get_error()` -- And many more... - ---- - -# 5. Portability - -## 5.1 Abstraction Layers - -### 5.1.1 C Standard Library Abstraction Layer - -wolfSSL can be built without the C standard library for maximum portability. - -**Memory:** - -wolfSSL uses `XMALLOC()` and `XFREE()` instead of malloc/free. Define `XMALLOC_USER` to provide custom implementations: - -```c -int wolfSSL_SetAllocators(wolfSSL_Malloc_cb malloc_function, - wolfSSL_Free_cb free_function, - wolfSSL_Realloc_cb realloc_function); -``` - -**String functions:** - -Define `STRING_USER` to override `XMEMCPY()`, `XMEMSET()`, `XMEMCMP()`, etc. - -**File System:** - -Define `NO_FILESYSTEM` to disable file system usage, or use `XFILE()` layer for custom file operations. - -### 5.1.2 Custom Input/Output Abstraction Layer - -For custom I/O or non-TCP transport: - -```c -typedef int (*CallbackIORecv)(WOLFSSL *ssl, char *buf, int sz, void *ctx); -typedef int (*CallbackIOSend)(WOLFSSL *ssl, char *buf, int sz, void *ctx); - -wolfSSL_SetIORecv(ctx, my_recv_callback); -wolfSSL_SetIOSend(ctx, my_send_callback); - -// Set context per session -wolfSSL_SetIOReadCtx(ssl, my_context); -wolfSSL_SetIOWriteCtx(ssl, my_context); -``` - -Define `WOLFSSL_USER_IO` to disable default I/O functions. - -### 5.1.3 Operating System Abstraction Layer - -OS-specific defines are in `wolfssl/wolfcrypt/types.h` and `wolfssl/internal.h`. - -## 5.2 Supported Operating Systems - -- Windows (Win32/64, WinCE) -- Linux (embedded, Yocto, OpenEmbedded, OpenWRT) -- Mac OS X, iOS -- FreeBSD, NetBSD, OpenBSD -- Android -- ThreadX, VxWorks, QNX -- FreeRTOS, SafeRTOS -- Micrium µC/OS-III -- TRON/ITRON/µITRON -- Nucleus, TinyOS -- Solaris, HP/UX, AIX -- Haiku -- Nintendo Wii/Gamecube -- And many more... - -## 5.3 Supported Chipmakers - -wolfSSL has been ported to chips from: - -- ARM -- Intel -- AMD -- Motorola -- Texas Instruments -- STMicroelectronics -- NXP/Freescale -- Analog Devices -- Microchip -- Atmel -- Marvell -- Silicon Labs -- Espressif -- Nordic Semiconductor -- Renesas -- Infineon -- And many more... - -## 5.4 C# Wrapper - -wolfSSL provides a C# wrapper for .NET applications. See the wolfSSL C# wrapper documentation for details. - ---- - -# 6. Callbacks - -wolfSSL provides several callback options for customization. - -## 6.1 Handshake Callback - -```c -typedef int (*HandShakeCallBack)(HandShakeInfo*); - -int wolfSSL_CTX_SetHandShakeCallback(WOLFSSL_CTX* ctx, HandShakeCallBack cb); -``` - -## 6.2 Timeout Callback - -For non-blocking sockets with custom timeout handling: - -```c -typedef int (*TimeoutCallBack)(TimeoutInfo*); - -int wolfSSL_CTX_SetTimeoutCallback(WOLFSSL_CTX* ctx, TimeoutCallBack cb); -``` - -## 6.3 Verification Callback - -Custom certificate verification: - -```c -typedef int (*VerifyCallback)(int, WOLFSSL_X509_STORE_CTX*); - -void wolfSSL_CTX_set_verify(WOLFSSL_CTX* ctx, int mode, VerifyCallback cb); -``` - -## 6.4 I/O Callbacks - -See section 5.1.2 for custom I/O callbacks. - -## 6.5 Public Key Callbacks - -For hardware crypto or custom key operations: - -```c -wolfSSL_CTX_SetEccSignCb(ctx, my_ecc_sign_cb); -wolfSSL_CTX_SetEccVerifyCb(ctx, my_ecc_verify_cb); -wolfSSL_CTX_SetRsaSignCb(ctx, my_rsa_sign_cb); -wolfSSL_CTX_SetRsaVerifyCb(ctx, my_rsa_verify_cb); -wolfSSL_CTX_SetRsaEncCb(ctx, my_rsa_enc_cb); -wolfSSL_CTX_SetRsaDecCb(ctx, my_rsa_dec_cb); -``` - -## 6.6 Atomic Record Layer Callbacks - -For custom MAC/encrypt and decrypt/verify: - -```c -wolfSSL_CTX_SetMacEncryptCb(ctx, my_mac_encrypt_cb); -wolfSSL_CTX_SetDecryptVerifyCb(ctx, my_decrypt_verify_cb); -``` - -## 6.7 Crypto Callbacks (cryptocb) - -The crypto callback framework enables users to override default cryptographic algorithm implementations at runtime. - -### 6.7.1 Enabling Crypto Callbacks - -```bash -./configure --enable-cryptocb -``` - -Or define `WOLF_CRYPTO_CB`. - -### 6.7.2 Registering a Callback - -```c -typedef int (*CryptoDevCallbackFunc)(int devId, wc_CryptoInfo* info, void* ctx); - -wc_CryptoCb_RegisterDevice(devId, my_crypto_callback, my_context); -``` - -### 6.7.3 Using with TLS - -```c -wolfSSL_CTX_SetDevId(ctx, devId); -wolfSSL_SetDevId(ssl, devId); -``` - -### 6.7.4 Callback Implementation - -```c -int myCryptoCallback(int devId, wc_CryptoInfo* info, void* ctx) -{ - int ret = CRYPTOCB_UNAVAILABLE; // Fall back to default - - switch (info->algo_type) { - case WC_ALGO_TYPE_PK: - // Handle public key operations - break; - case WC_ALGO_TYPE_HASH: - // Handle hashing - break; - case WC_ALGO_TYPE_CIPHER: - // Handle cipher operations - break; - case WC_ALGO_TYPE_RNG: - // Handle RNG - break; - } - return ret; -} -``` - ---- - -# 7. Keys and Certificates - -## 7.1 Supported Formats and Sizes - -wolfSSL supports: -- **PEM** - Base64 encoded ASCII (`.pem`, `.crt`, `.cer`, `.key`) -- **DER** - Binary ASN.1 encoding (`.der`, `.cer`) -- **PKCS#8** - Private key format (with PKCS#5 or PKCS#12 encryption) - -## 7.2 Supported Certificate Extensions - -| Extension | Supported | -|-----------|-----------| -| Authority Key Identifier | Yes | -| Subject Key Identifier | Yes | -| Key Usage | Yes | -| Certificate Policies | Yes | -| Subject Alternative Name | Yes | -| Basic Constraints | Yes | -| Name Constraints | Yes | -| Extended Key Usage | Yes | -| CRL Distribution Points | Yes | -| Custom OID | Yes | - -### 7.2.1 Subject Alternative Names - -Supported types: email, DNS name, IP address, URI. - -### 7.2.2 Key Usage - -Supported: digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment, keyAgreement, keyCertSign, cRLSign, encipherOnly, decipherOnly. - -### 7.2.3 Extended Key Usage - -Supported: anyExtendedKeyUsage, serverAuth, clientAuth, codeSigning, emailProtection, timeStamping, OCSPSigning. - -## 7.3 Certificate Loading - -### 7.3.1 Loading CA Certificates - -```c -wolfSSL_CTX_load_verify_locations(ctx, "ca-cert.pem", NULL); -``` - -**Note:** Load certificates in order of trust: ROOT CA first, then intermediates. - -### 7.3.2 Loading Client/Server Certificates - -```c -// Single certificate -wolfSSL_CTX_use_certificate_file(ctx, "server-cert.pem", SSL_FILETYPE_PEM); - -// Certificate chain (subject cert first, then intermediates, root last) -wolfSSL_CTX_use_certificate_chain_file(ctx, "server-chain.pem"); -``` - -### 7.3.3 Loading Private Keys - -```c -wolfSSL_CTX_use_PrivateKey_file(ctx, "server-key.pem", SSL_FILETYPE_PEM); -``` - -### 7.3.4 Loading Trusted Peer Certificates - -```c -wolfSSL_CTX_trust_peer_cert(ctx, "peer-cert.pem", SSL_FILETYPE_PEM); -``` - -## 7.4 Certificate Chain Verification - -wolfSSL only requires the root certificate to be loaded as trusted to verify an entire chain. - -## 7.5 Domain Name Check - -```c -wolfSSL_check_domain_name(ssl, "example.com"); -``` - -Returns `DOMAIN_NAME_MISMATCH` if verification fails. - -## 7.6 No File System (Buffer Loading) - -Define `NO_FILESYSTEM` and use buffer functions: - -```c -wolfSSL_CTX_load_verify_buffer(ctx, cert_buffer, cert_size, SSL_FILETYPE_PEM); -wolfSSL_CTX_use_certificate_buffer(ctx, cert_buffer, cert_size, SSL_FILETYPE_PEM); -wolfSSL_CTX_use_PrivateKey_buffer(ctx, key_buffer, key_size, SSL_FILETYPE_PEM); -wolfSSL_CTX_use_certificate_chain_buffer(ctx, chain_buffer, chain_size); -``` - -## 7.7 RSA Key Generation - -Enable with `--enable-keygen` or `WOLFSSL_KEY_GEN`. - -```c -RsaKey genKey; -RNG rng; - -wc_InitRng(&rng); -wc_InitRsaKey(&genKey, NULL); -wc_MakeRsaKey(&genKey, 2048, 65537, &rng); - -// Export to DER -byte der[4096]; -int derSz = wc_RsaKeyToDer(&genKey, der, sizeof(der)); - -// Export to PEM -byte pem[4096]; -int pemSz = wc_DerToPem(der, derSz, pem, sizeof(pem), PRIVATEKEY_TYPE); -``` - -## 7.8 Certificate Generation - -Enable with `--enable-certgen` or `WOLFSSL_CERT_GEN`. - -```c -Cert myCert; -wc_InitCert(&myCert); - -// Set subject info -strncpy(myCert.subject.country, "US", CTC_NAME_SIZE); -strncpy(myCert.subject.state, "OR", CTC_NAME_SIZE); -strncpy(myCert.subject.org, "MyOrg", CTC_NAME_SIZE); -strncpy(myCert.subject.commonName, "www.example.com", CTC_NAME_SIZE); - -// Self-signed certificate -byte derCert[4096]; -int certSz = wc_MakeSelfCert(&myCert, derCert, sizeof(derCert), &key, &rng); - -// CA-signed certificate -wc_SetIssuer(&myCert, "ca-cert.pem"); -certSz = wc_MakeCert(&myCert, derCert, sizeof(derCert), &key, NULL, &rng); -certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, sizeof(derCert), &caKey, NULL, &rng); -``` - -## 7.9 Certificate Signing Request (CSR) Generation - -Enable with `--enable-certreq --enable-certgen`. - -```c -Cert request; -wc_InitCert(&request); - -// Set subject info... - -byte der[4096]; -int ret = wc_MakeCertReq(&request, der, sizeof(der), NULL, &key); -request.sigType = CTC_SHA256wECDSA; -int derSz = wc_SignCert(request.bodySz, request.sigType, der, sizeof(der), NULL, &key, &rng); - -byte pem[4096]; -wc_DerToPem(der, derSz, pem, sizeof(pem), CERTREQ_TYPE); -``` - ---- - -# 8. Debugging - -## 8.1 Debugging and Logging - -Enable debug output: - -```c -wolfSSL_Debugging_ON(); -wolfSSL_Debugging_OFF(); -``` - -Requires building with `DEBUG_WOLFSSL` defined. - -### 8.1.1 Custom Logging Callback - -```c -typedef void (*wolfSSL_Logging_cb)(const int logLevel, const char* const logMessage); - -wolfSSL_SetLoggingCb(my_logging_function); -``` - -## 8.2 Error Codes - -```c -int err = wolfSSL_get_error(ssl, result); - -char errorString[80]; -wolfSSL_ERR_error_string(err, errorString); -``` - -For non-blocking sockets, test for `SSL_ERROR_WANT_READ` or `SSL_ERROR_WANT_WRITE`. - ---- - -# 9. Library Design - -## 9.1 Library Headers - -```c -// Native wolfSSL API -#include - -// OpenSSL Compatibility Layer -#include -``` - -## 9.2 Startup and Exit - -```c -wolfSSL_Init(); // Call at startup -wolfSSL_Cleanup(); // Call at shutdown -``` - -## 9.3 Structure Usage - -| Native API | OpenSSL Compat | -|------------|----------------| -| `WOLFSSL` | `SSL` | -| `WOLFSSL_CTX` | `SSL_CTX` | -| `WOLFSSL_METHOD` | `SSL_METHOD` | -| `WOLFSSL_SESSION` | `SSL_SESSION` | -| `WOLFSSL_X509` | `X509` | - -## 9.4 Thread Safety - -wolfSSL is thread-safe by design: - -1. **WOLFSSL objects** - Can be shared across threads but access must be synchronized -2. **WOLFSSL_CTX** - Initialize completely before creating WOLFSSL objects -3. **ECC cache** - Call `wc_ecc_fp_free()` before thread exit if using fixed-point ECC - -## 9.5 Input and Output Buffers - -- Default buffer size: controlled by `RECORD_SIZE` (default 128 bytes) -- Maximum: `MAX_RECORD_SIZE` (16,384 bytes) -- Define `LARGE_STATIC_BUFFERS` for 16KB static buffers -- Define `STATIC_CHUNKS_ONLY` to limit writes to `RECORD_SIZE` - ---- - -# 10. wolfCrypt Usage Reference - -## 10.1 Hash Functions - -### 10.1.1 SHA Family - -```c -#include - -Sha256 sha; -byte hash[SHA256_DIGEST_SIZE]; -byte data[1024]; - -wc_InitSha256(&sha); -wc_Sha256Update(&sha, data, sizeof(data)); -wc_Sha256Final(&sha, hash); -``` - -Similar API for SHA, SHA224, SHA384, SHA512. - -### 10.1.2 BLAKE2b - -```c -#include - -Blake2b b2b; -byte digest[64]; - -wc_InitBlake2b(&b2b, 64); -wc_Blake2bUpdate(&b2b, input, inputLen); -wc_Blake2bFinal(&b2b, digest, 64); -``` - -## 10.2 Keyed Hash Functions - -### 10.2.1 HMAC - -```c -#include - -Hmac hmac; -byte key[32]; -byte digest[SHA256_DIGEST_SIZE]; - -wc_HmacSetKey(&hmac, SHA256, key, sizeof(key)); -wc_HmacUpdate(&hmac, data, dataLen); -wc_HmacFinal(&hmac, digest); -``` - -### 10.2.2 Poly1305 - -```c -#include - -Poly1305 pmac; -byte key[32]; -byte digest[16]; - -wc_Poly1305SetKey(&pmac, key, sizeof(key)); -wc_Poly1305Update(&pmac, data, dataLen); -wc_Poly1305Final(&pmac, digest); -``` - -## 10.3 Block Ciphers - -### 10.3.1 AES - -```c -#include - -Aes aes; -byte key[32]; -byte iv[16]; -byte plain[32], cipher[32]; - -wc_AesInit(&aes, NULL, INVALID_DEVID); -wc_AesSetKey(&aes, key, sizeof(key), iv, AES_ENCRYPTION); -wc_AesCbcEncrypt(&aes, cipher, plain, sizeof(plain)); - -wc_AesSetKey(&aes, key, sizeof(key), iv, AES_DECRYPTION); -wc_AesCbcDecrypt(&aes, plain, cipher, sizeof(cipher)); -``` - -**AES-GCM:** - -```c -wc_AesGcmSetKey(&aes, key, keyLen); -wc_AesGcmEncrypt(&aes, cipher, plain, plainLen, iv, ivLen, tag, tagLen, aad, aadLen); -wc_AesGcmDecrypt(&aes, plain, cipher, cipherLen, iv, ivLen, tag, tagLen, aad, aadLen); -``` - -### 10.3.2 3DES - -```c -#include - -Des3 des3; -byte key[24], iv[8]; - -wc_Des3_SetKey(&des3, key, iv, DES_ENCRYPTION); -wc_Des3_CbcEncrypt(&des3, cipher, plain, sizeof(plain)); -``` - -## 10.4 Stream Ciphers - -### 10.4.1 ChaCha20 - -```c -#include - -ChaCha cha; -byte key[32], iv[12]; - -wc_Chacha_SetKey(&cha, key, sizeof(key)); -wc_Chacha_SetIV(&cha, iv, 0); -wc_Chacha_Process(&cha, cipher, plain, sizeof(plain)); -``` - -## 10.5 Public Key Cryptography - -### 10.5.1 RSA - -```c -#include - -RsaKey rsa; -RNG rng; - -wc_InitRsaKey(&rsa, NULL); -wc_InitRng(&rng); - -// Encrypt -word32 outLen = wc_RsaPublicEncrypt(in, inLen, out, outSz, &rsa, &rng); - -// Decrypt -word32 plainLen = wc_RsaPrivateDecrypt(cipher, cipherLen, plain, plainSz, &rsa); - -wc_FreeRsaKey(&rsa); -``` - -### 10.5.2 ECC - -```c -#include - -ecc_key key; -RNG rng; - -wc_ecc_init(&key); -wc_InitRng(&rng); -wc_ecc_make_key(&rng, 32, &key); // 256-bit key - -// Sign -byte sig[ECC_MAX_SIG_SIZE]; -word32 sigLen = sizeof(sig); -wc_ecc_sign_hash(hash, hashLen, sig, &sigLen, &rng, &key); - -// Verify -int verified = 0; -wc_ecc_verify_hash(sig, sigLen, hash, hashLen, &verified, &key); - -wc_ecc_free(&key); -``` - -### 10.5.3 Diffie-Hellman - -```c -#include - -DhKey dh; -byte priv[128], pub[128], agree[128]; -word32 privSz, pubSz, agreeSz; - -wc_InitDhKey(&dh); -wc_DhKeyDecode(dhParams, &idx, &dh, dhParamsSz); -wc_DhGenerateKeyPair(&dh, &rng, priv, &privSz, pub, &pubSz); -wc_DhAgree(&dh, agree, &agreeSz, priv, privSz, peerPub, peerPubSz); - -wc_FreeDhKey(&dh); -``` - ---- - -# 11. SSL Tutorial - -## 11.1 Quick Summary - -1. Initialize wolfSSL: `wolfSSL_Init()` -2. Create context: `wolfSSL_CTX_new(method)` -3. Load certificates and keys -4. Create SSL object: `wolfSSL_new(ctx)` -5. Associate socket: `wolfSSL_set_fd(ssl, sockfd)` -6. Handshake: `wolfSSL_connect()` or `wolfSSL_accept()` -7. Transfer data: `wolfSSL_read()`, `wolfSSL_write()` -8. Cleanup: `wolfSSL_free()`, `wolfSSL_CTX_free()`, `wolfSSL_Cleanup()` - -## 11.2 Client Example - -```c -#include - -int main() { - wolfSSL_Init(); - - WOLFSSL_CTX* ctx = wolfSSL_CTX_new(wolfTLSv1_3_client_method()); - wolfSSL_CTX_load_verify_locations(ctx, "ca-cert.pem", NULL); - - int sockfd = /* create and connect socket */; - - WOLFSSL* ssl = wolfSSL_new(ctx); - wolfSSL_set_fd(ssl, sockfd); - - if (wolfSSL_connect(ssl) != SSL_SUCCESS) { - // Handle error - } - - wolfSSL_write(ssl, "Hello", 5); - - char buffer[256]; - int ret = wolfSSL_read(ssl, buffer, sizeof(buffer)); - - wolfSSL_free(ssl); - wolfSSL_CTX_free(ctx); - wolfSSL_Cleanup(); - return 0; -} -``` - -## 11.3 Server Example - -```c -#include - -int main() { - wolfSSL_Init(); - - WOLFSSL_CTX* ctx = wolfSSL_CTX_new(wolfTLSv1_3_server_method()); - wolfSSL_CTX_use_certificate_file(ctx, "server-cert.pem", SSL_FILETYPE_PEM); - wolfSSL_CTX_use_PrivateKey_file(ctx, "server-key.pem", SSL_FILETYPE_PEM); - - int listenfd = /* create and bind socket */; - listen(listenfd, 5); - - int clientfd = accept(listenfd, NULL, NULL); - - WOLFSSL* ssl = wolfSSL_new(ctx); - wolfSSL_set_fd(ssl, clientfd); - - if (wolfSSL_accept(ssl) != SSL_SUCCESS) { - // Handle error - } - - char buffer[256]; - int ret = wolfSSL_read(ssl, buffer, sizeof(buffer)); - wolfSSL_write(ssl, buffer, ret); // Echo back - - wolfSSL_free(ssl); - close(clientfd); - wolfSSL_CTX_free(ctx); - wolfSSL_Cleanup(); - return 0; -} -``` - -## 11.4 Non-Blocking I/O - -```c -wolfSSL_set_using_nonblock(ssl, 1); - -int ret = wolfSSL_connect(ssl); -if (ret != SSL_SUCCESS) { - int err = wolfSSL_get_error(ssl, ret); - if (err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE) { - // Retry later - } -} -``` - -## 11.5 Session Resumption - -```c -// After successful connection -WOLFSSL_SESSION* session = wolfSSL_get_session(ssl); - -// For new connection -WOLFSSL* ssl2 = wolfSSL_new(ctx); -wolfSSL_set_session(ssl2, session); -wolfSSL_connect(ssl2); -``` - ---- - -# Appendix A: SSL/TLS Overview - -## A.1 SSL/TLS Protocol Versions - -| Protocol | Status | -|----------|--------| -| SSL 2.0 | Deprecated, insecure | -| SSL 3.0 | Deprecated (POODLE vulnerability) | -| TLS 1.0 | Legacy, being phased out | -| TLS 1.1 | Legacy, being phased out | -| TLS 1.2 | Widely deployed, secure | -| TLS 1.3 | Current standard, recommended | -| DTLS 1.0 | Based on TLS 1.1 | -| DTLS 1.2 | Based on TLS 1.2 | -| DTLS 1.3 | Based on TLS 1.3 | - -## A.2 TLS Handshake Overview - -### TLS 1.2 Handshake - -1. **ClientHello** - Client sends supported versions, cipher suites, random -2. **ServerHello** - Server selects version, cipher suite, sends random -3. **Certificate** - Server sends certificate chain -4. **ServerKeyExchange** - Server sends key exchange parameters (if needed) -5. **CertificateRequest** - Server requests client certificate (optional) -6. **ServerHelloDone** - Server signals end of hello messages -7. **Certificate** - Client sends certificate (if requested) -8. **ClientKeyExchange** - Client sends key exchange data -9. **CertificateVerify** - Client proves certificate ownership -10. **ChangeCipherSpec** - Client switches to encrypted mode -11. **Finished** - Client sends encrypted verification -12. **ChangeCipherSpec** - Server switches to encrypted mode -13. **Finished** - Server sends encrypted verification - -### TLS 1.3 Handshake (Simplified) - -1. **ClientHello** - Client sends supported versions, cipher suites, key shares -2. **ServerHello** - Server selects parameters, sends key share -3. **EncryptedExtensions** - Server extensions (encrypted) -4. **Certificate** - Server certificate (encrypted) -5. **CertificateVerify** - Server signature (encrypted) -6. **Finished** - Server verification (encrypted) -7. **Certificate** - Client certificate (encrypted, if requested) -8. **CertificateVerify** - Client signature (encrypted) -9. **Finished** - Client verification (encrypted) - -TLS 1.3 achieves 1-RTT handshake (or 0-RTT with early data). - -## A.3 Certificate Chain - -``` -Root CA (self-signed, trusted) - └── Intermediate CA (signed by Root) - └── Server/Client Certificate (signed by Intermediate) -``` - ---- - -# Appendix B: wolfSSL API Quick Reference - -## B.1 Initialization and Cleanup - -| Function | Description | -|----------|-------------| -| `wolfSSL_Init()` | Initialize wolfSSL library | -| `wolfSSL_Cleanup()` | Cleanup wolfSSL library | - -## B.2 Context Functions - -| Function | Description | -|----------|-------------| -| `wolfSSL_CTX_new(method)` | Create new context | -| `wolfSSL_CTX_free(ctx)` | Free context | -| `wolfSSL_CTX_load_verify_locations(ctx, file, path)` | Load CA certificates | -| `wolfSSL_CTX_use_certificate_file(ctx, file, type)` | Load certificate | -| `wolfSSL_CTX_use_PrivateKey_file(ctx, file, type)` | Load private key | -| `wolfSSL_CTX_use_certificate_chain_file(ctx, file)` | Load certificate chain | -| `wolfSSL_CTX_set_verify(ctx, mode, cb)` | Set verification mode | -| `wolfSSL_CTX_set_cipher_list(ctx, list)` | Set cipher suites | - -## B.3 SSL Session Functions - -| Function | Description | -|----------|-------------| -| `wolfSSL_new(ctx)` | Create new SSL object | -| `wolfSSL_free(ssl)` | Free SSL object | -| `wolfSSL_set_fd(ssl, fd)` | Associate socket | -| `wolfSSL_connect(ssl)` | Client handshake | -| `wolfSSL_accept(ssl)` | Server handshake | -| `wolfSSL_read(ssl, buf, len)` | Read data | -| `wolfSSL_write(ssl, buf, len)` | Write data | -| `wolfSSL_shutdown(ssl)` | Shutdown connection | -| `wolfSSL_get_error(ssl, ret)` | Get error code | - -## B.4 Protocol Methods - -**Client Methods:** -- `wolfTLSv1_2_client_method()` -- `wolfTLSv1_3_client_method()` -- `wolfSSLv23_client_method()` (flexible) -- `wolfDTLSv1_2_client_method()` - -**Server Methods:** -- `wolfTLSv1_2_server_method()` -- `wolfTLSv1_3_server_method()` -- `wolfSSLv23_server_method()` (flexible) -- `wolfDTLSv1_2_server_method()` - -## B.5 Certificate Functions - -| Function | Description | -|----------|-------------| -| `wolfSSL_get_peer_certificate(ssl)` | Get peer certificate | -| `wolfSSL_X509_get_subject_name(x509)` | Get subject name | -| `wolfSSL_X509_get_issuer_name(x509)` | Get issuer name | -| `wolfSSL_X509_NAME_oneline(name, buf, len)` | Convert name to string | -| `wolfSSL_X509_free(x509)` | Free certificate | - ---- - -# Appendix C: Error Codes - -## C.1 Common SSL Errors - -| Code | Name | Description | -|------|------|-------------| -| -155 | `DOMAIN_NAME_MISMATCH` | Domain name verification failed | -| -188 | `ASN_NO_SIGNER_E` | No CA signer for certificate | -| -155 | `DATE_E` | Certificate date invalid | -| -173 | `VERIFY_CERT_ERROR` | Certificate verification failed | -| -308 | `SOCKET_ERROR_E` | Socket error | -| -323 | `WANT_READ` | Need more data (non-blocking) | -| -324 | `WANT_WRITE` | Need to write (non-blocking) | - -## C.2 wolfCrypt Errors - -| Code | Name | Description | -|------|------|-------------| -| -140 | `BAD_FUNC_ARG` | Bad function argument | -| -141 | `MEMORY_E` | Memory allocation failed | -| -143 | `BUFFER_E` | Buffer too small | -| -170 | `RSA_BUFFER_E` | RSA buffer error | -| -212 | `ECC_BAD_ARG_E` | ECC bad argument | - ---- - -# Appendix D: Build Options Reference - -## D.1 Common Configure Options - -| Option | Description | -|--------|-------------| -| `--enable-debug` | Enable debug mode | -| `--enable-opensslextra` | Enable OpenSSL compatibility | -| `--enable-opensslall` | Enable all OpenSSL compatibility | -| `--enable-sni` | Enable Server Name Indication | -| `--enable-alpn` | Enable ALPN | -| `--enable-dtls` | Enable DTLS | -| `--enable-tls13` | Enable TLS 1.3 | -| `--enable-ecc` | Enable ECC | -| `--enable-curve25519` | Enable Curve25519 | -| `--enable-ed25519` | Enable Ed25519 | -| `--enable-aesni` | Enable AES-NI | -| `--enable-intelasm` | Enable Intel assembly | -| `--enable-keygen` | Enable key generation | -| `--enable-certgen` | Enable certificate generation | -| `--enable-certreq` | Enable CSR generation | -| `--enable-pkcs7` | Enable PKCS#7 | -| `--enable-pkcs11` | Enable PKCS#11 | -| `--enable-staticmemory` | Enable static memory | -| `--enable-sniffer` | Enable SSL sniffer | - -## D.2 Common Preprocessor Macros - -| Macro | Description | -|-------|-------------| -| `DEBUG_WOLFSSL` | Enable debug output | -| `NO_FILESYSTEM` | Disable filesystem | -| `NO_INLINE` | Disable inline functions | -| `SINGLE_THREADED` | Single-threaded mode | -| `WOLFSSL_STATIC_MEMORY` | Static memory allocation | -| `HAVE_ECC` | Enable ECC | -| `HAVE_AESGCM` | Enable AES-GCM | -| `HAVE_CHACHA` | Enable ChaCha20 | -| `HAVE_POLY1305` | Enable Poly1305 | -| `WOLFSSL_TLS13` | Enable TLS 1.3 | -| `WOLFSSL_DTLS` | Enable DTLS | -| `OPENSSL_EXTRA` | OpenSSL compatibility | -| `WOLFSSL_SHA384` | Enable SHA-384 | -| `WOLFSSL_SHA512` | Enable SHA-512 | - ---- - -# Appendix E: Platform-Specific Notes - -## E.1 Embedded Systems - -For embedded systems with limited resources: - -```c -// user_settings.h -#define WOLFSSL_SMALL_STACK -#define SMALL_SESSION_CACHE -#define NO_FILESYSTEM -#define NO_WRITEV -#define NO_DEV_RANDOM -#define WOLFSSL_USER_IO -#define SINGLE_THREADED -``` - -## E.2 RTOS Integration - -**FreeRTOS:** -```c -#define FREERTOS -#define FREERTOS_TCP -``` - -**ThreadX:** -```c -#define THREADX -#define NETX -``` - -**Zephyr:** -```c -#define WOLFSSL_ZEPHYR -``` - -## E.3 Hardware Crypto Acceleration - -**STM32:** -```c -#define WOLFSSL_STM32_CUBEMX -#define STM32_CRYPTO -#define STM32_RNG -``` - -**ESP32:** -```c -#define WOLFSSL_ESPWROOM32 -``` - ---- - -# Appendix F: Comparison with Other Libraries - -| Feature | wolfSSL | OpenSSL | mbedTLS | -|---------|---------|---------|---------| -| Footprint | 20-100 KB | 500+ KB | 60-100 KB | -| TLS 1.3 | Yes | Yes | Yes | -| DTLS 1.3 | Yes | No | No | -| FIPS 140-2/3 | Yes | Yes | No | -| Hardware Accel | Extensive | Limited | Moderate | -| Commercial License | Yes | Apache 2.0 | Apache 2.0 | -| Embedded Focus | Primary | Secondary | Primary | - ---- - -# Appendix G: Post-Quantum Cryptography - -wolfSSL supports post-quantum cryptographic algorithms for protection against quantum computer attacks. - -## G.1 Supported Algorithms - -**Key Encapsulation:** -- ML-KEM (FIPS 203, formerly Kyber) - -**Digital Signatures:** -- ML-DSA (FIPS 204, formerly Dilithium) - -## G.2 Hybrid Mode - -wolfSSL supports hybrid key exchange combining classical (ECDHE) and post-quantum (ML-KEM) algorithms for defense-in-depth. - -## G.3 Enabling Post-Quantum Support - -```bash -./configure --with-liboqs -``` - -Or use wolfSSL's native implementations with appropriate configure options. - ---- - -# Appendix H: wolfSM (ShangMi) Support - -wolfSSL supports Chinese National Cryptographic Standards (ShangMi): - -| Algorithm | Description | -|-----------|-------------| -| SM2 | Elliptic curve cryptography | -| SM3 | Cryptographic hash function | -| SM4 | Block cipher | - -## H.1 Enabling ShangMi - -```bash -./configure --enable-sm2 --enable-sm3 --enable-sm4 -``` - -## H.2 Usage Example - -```c -#include -#include - -// SM3 Hash -wc_Sm3 sm3; -wc_InitSm3(&sm3, NULL, INVALID_DEVID); -wc_Sm3Update(&sm3, data, dataLen); -wc_Sm3Final(&sm3, hash); - -// SM4 Encryption -Sm4 sm4; -wc_Sm4SetKey(&sm4, key, SM4_KEY_SIZE); -wc_Sm4CbcEncrypt(&sm4, cipher, plain, plainLen); -``` - ---- - -# Appendix I: Resources and Support - -## I.1 Documentation - -- **Manual:** https://www.wolfssl.com/documentation/manuals/wolfssl/ -- **API Reference:** https://www.wolfssl.com/documentation/manuals/wolfssl/ -- **Examples:** https://github.com/wolfSSL/wolfssl-examples - -## I.2 Support - -- **Forums:** https://www.wolfssl.com/forums/ -- **Email:** support@wolfssl.com -- **GitHub Issues:** https://github.com/wolfSSL/wolfssl/issues - -## I.3 Licensing - -wolfSSL is dual-licensed: -- **GPLv2** - Free for open source projects -- **Commercial** - Available for proprietary applications - ---- - -*This documentation is derived from the wolfSSL Manual. For the most current and complete documentation, please visit https://www.wolfssl.com/documentation/* - diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index ce88955..d0f5192 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -4,6 +4,7 @@ ** xref:tutorials/echo-server.adoc[Echo Server] ** xref:tutorials/http-client.adoc[HTTP Client] ** xref:tutorials/dns-lookup.adoc[DNS Lookup] +** xref:tutorials/tls-context.adoc[TLS Context Configuration] * Guide ** xref:guide/tcp-networking.adoc[TCP/IP Networking] ** xref:guide/concurrent-programming.adoc[Concurrent Programming] diff --git a/doc/modules/ROOT/pages/guide/tls.adoc b/doc/modules/ROOT/pages/guide/tls.adoc index c440297..ee34d12 100644 --- a/doc/modules/ROOT/pages/guide/tls.adoc +++ b/doc/modules/ROOT/pages/guide/tls.adoc @@ -15,7 +15,7 @@ It wraps an existing stream (like `socket`) and provides encrypted I/O. NOTE: Code snippets assume: [source,cpp] ---- -#include +#include #include namespace corosio = boost::corosio; diff --git a/doc/modules/ROOT/pages/tutorials/http-client.adoc b/doc/modules/ROOT/pages/tutorials/http-client.adoc index c7aa28e..ccba894 100644 --- a/doc/modules/ROOT/pages/tutorials/http-client.adoc +++ b/doc/modules/ROOT/pages/tutorials/http-client.adoc @@ -217,7 +217,7 @@ To make HTTPS requests, wrap the socket in a `wolfssl_stream`: [source,cpp] ---- -#include +#include capy::task run_https_client( corosio::io_context& ioc, diff --git a/doc/modules/ROOT/pages/tutorials/tls-context.adoc b/doc/modules/ROOT/pages/tutorials/tls-context.adoc new file mode 100644 index 0000000..c559018 --- /dev/null +++ b/doc/modules/ROOT/pages/tutorials/tls-context.adoc @@ -0,0 +1,575 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie.falco@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + += TLS Context Configuration + +This tutorial covers how to configure TLS contexts for secure connections. +A `tls::context` stores certificates, keys, and settings that define how +TLS connections are established and verified. + +NOTE: Code snippets assume: +[source,cpp] +---- +#include + +namespace tls = boost::corosio::tls; +---- + +== Introduction + +The `tls::context` class is a portable abstraction for TLS configuration that +works across multiple TLS backends (WolfSSL, OpenSSL, mbedTLS, etc.). It +handles: + +* Loading certificates and private keys +* Configuring trust anchors for peer verification +* Setting protocol versions and cipher suites +* Configuring certificate verification behavior +* Managing revocation checking (CRL, OCSP) + +Use `tls::context` to configure TLS settings once, then pass it to TLS streams +for establishing secure connections. + +== Construction + +A `tls::context` is a shared handle to an opaque implementation. Copies share +the same underlying state, making it easy to pass contexts by value and share +them across multiple TLS streams. + +[source,cpp] +---- +// Create a default context +tls::context ctx; + +// Copy shares the same underlying state +tls::context ctx2 = ctx; // ctx and ctx2 share state + +// Move transfers ownership +tls::context ctx3 = std::move( ctx ); +// ctx is now empty +---- + +The default constructor creates a context ready for TLS 1.2 and TLS 1.3 +connections. No certificates or trust anchors are loaded initially—you +must configure these before use. + +=== Typical Setup Pattern + +Most applications follow this pattern: + +[source,cpp] +---- +tls::context ctx; + +// 1. Load credentials (for servers, or clients using client certs) +ctx.use_certificate_chain_file( "server.crt" ).value(); +ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); + +// 2. Configure trust anchors (for verifying peer certificates) +ctx.set_default_verify_paths().value(); // Use system CA store + +// 3. Set verification mode +ctx.set_verify_mode( tls::verify_mode::peer ).value(); + +// 4. Configure protocol options (optional) +ctx.set_min_protocol_version( tls::version::tls_1_2 ).value(); +---- + +== Credential Loading + +Credentials consist of a certificate (or certificate chain) and its +corresponding private key. Servers always need credentials; clients +need them only for mutual TLS (mTLS). + +=== Loading Certificate and Key Separately + +The most common approach loads the certificate chain and private key +from separate PEM files: + +[source,cpp] +---- +// Load certificate chain (leaf + intermediates) +ctx.use_certificate_chain_file( "fullchain.pem" ).value(); + +// Load the matching private key +ctx.use_private_key_file( "privkey.key", tls::file_format::pem ).value(); +---- + +For a single certificate without intermediates: + +[source,cpp] +---- +ctx.use_certificate_file( "server.crt", tls::file_format::pem ).value(); +ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); +---- + +=== Loading from PKCS#12 Bundles + +PKCS#12 (`.pfx` or `.p12` files) bundles certificate, key, and chain +into a single password-protected file: + +[source,cpp] +---- +ctx.use_pkcs12_file( "credentials.pfx", "bundle-password" ).value(); +---- + +=== Loading from Memory + +If credentials are stored in memory (e.g., from a database or secret +manager), use the non-file variants: + +[source,cpp] +---- +std::string cert_pem = fetch_certificate_from_vault(); +std::string key_pem = fetch_key_from_vault(); + +ctx.use_certificate_chain( cert_pem ).value(); +ctx.use_private_key( key_pem, tls::file_format::pem ).value(); +---- + +=== DER Format + +For binary DER-encoded files (common in embedded systems): + +[source,cpp] +---- +ctx.use_certificate_file( "server.der", tls::file_format::der ).value(); +ctx.use_private_key_file( "server.key.der", tls::file_format::der ).value(); +---- + +=== Encrypted Private Keys + +For password-protected private keys, set a password callback before +loading. See <> for details. + +== Trust Anchors + +Trust anchors are root CA certificates used to verify peer certificates. +Without trust anchors, certificate verification will fail. + +=== Using System Trust Store + +For HTTPS clients connecting to public servers, use the system's CA store: + +[source,cpp] +---- +ctx.set_default_verify_paths().value(); +---- + +This uses the operating system's trusted certificates: + +* Linux: `/etc/ssl/certs` or distribution-specific paths +* macOS: System Keychain +* Windows: Windows Certificate Store + +=== Custom CA Bundle + +For internal PKI or testing, load a custom CA bundle: + +[source,cpp] +---- +// Load CA bundle file (may contain multiple CAs) +ctx.load_verify_file( "/path/to/ca-bundle.crt" ).value(); +---- + +=== CA Directory + +On systems with hashed certificate directories (created by `c_rehash`): + +[source,cpp] +---- +ctx.add_verify_path( "/etc/ssl/certs" ).value(); +---- + +=== Individual CA Certificates + +Add CA certificates one at a time: + +[source,cpp] +---- +// From memory +std::string internal_ca = load_ca_from_config(); +ctx.add_certificate_authority( internal_ca ).value(); + +// Multiple CAs +ctx.add_certificate_authority( root_ca_pem ).value(); +ctx.add_certificate_authority( intermediate_ca_pem ).value(); +---- + +=== Combining Trust Sources + +You can combine multiple trust sources: + +[source,cpp] +---- +// Start with system trust store +ctx.set_default_verify_paths().value(); + +// Add an internal CA for corporate servers +ctx.add_certificate_authority( corporate_ca_pem ).value(); +---- + +== Protocol Configuration + +Control which TLS versions and cipher suites are allowed for connections. + +=== TLS Version Bounds + +Set minimum and/or maximum TLS versions: + +[source,cpp] +---- +// Require TLS 1.2 or newer (default) +ctx.set_min_protocol_version( tls::version::tls_1_2 ).value(); + +// Require TLS 1.3 only +ctx.set_min_protocol_version( tls::version::tls_1_3 ).value(); +ctx.set_max_protocol_version( tls::version::tls_1_3 ).value(); +---- + +=== Cipher Suites + +Configure allowed cipher suites using OpenSSL-style syntax: + +[source,cpp] +---- +// Strong cipher suites only +ctx.set_ciphersuites( "ECDHE+AESGCM:ECDHE+CHACHA20" ).value(); + +// Disable weak ciphers +ctx.set_ciphersuites( "HIGH:!aNULL:!MD5:!RC4" ).value(); +---- + +=== ALPN (Application-Layer Protocol Negotiation) + +ALPN negotiates the application protocol over TLS. Common uses: + +[source,cpp] +---- +// HTTP/2 with HTTP/1.1 fallback +ctx.set_alpn( { "h2", "http/1.1" } ).value(); + +// gRPC +ctx.set_alpn( { "h2" } ).value(); + +// Custom protocol +ctx.set_alpn( { "my-protocol/1.0" } ).value(); +---- + +The server selects from the client's list based on its own preferences. + +== Certificate Verification + +Configure how peer certificates are verified during the TLS handshake. + +=== Verification Modes + +[source,cpp] +---- +// Don't verify peer (not recommended for production) +ctx.set_verify_mode( tls::verify_mode::none ).value(); + +// Verify peer if certificate is presented +ctx.set_verify_mode( tls::verify_mode::peer ).value(); + +// Require and verify peer certificate (mTLS server-side) +ctx.set_verify_mode( tls::verify_mode::require_peer ).value(); +---- + +Typical usage: + +* **Clients**: Use `peer` to verify server certificates +* **Servers without mTLS**: Use `none` +* **Servers with mTLS**: Use `require_peer` to require client certs + +=== Hostname Verification + +For clients, set the expected server hostname: + +[source,cpp] +---- +ctx.set_hostname( "api.example.com" ); +---- + +This enables: + +* **SNI (Server Name Indication)**: Tells the server which certificate to + present (required for virtual hosting) +* **Hostname matching**: Validates the certificate matches the expected host + +=== Chain Depth + +Limit how many intermediate certificates are allowed: + +[source,cpp] +---- +// Allow up to 3 intermediates (leaf -> 3 intermediates -> root) +ctx.set_verify_depth( 3 ).value(); +---- + +The default (typically 100) is sufficient for most chains. + +=== Custom Verification + +For advanced use cases, install a custom verification callback: + +[source,cpp] +---- +ctx.set_verify_callback( + []( bool preverified, auto& verify_ctx ) -> bool + { + // preverified: result of standard verification + // verify_ctx: access to certificate information + + if( !preverified ) + { + // Log or inspect the failure + return false; // reject + } + + // Additional custom checks... + return true; // accept + }).value(); +---- + +== Revocation Checking + +Certificate revocation checking verifies that certificates haven't been +invalidated by the issuing CA. Two mechanisms are supported: CRL and OCSP. + +=== Revocation Policy + +Set the overall revocation checking behavior: + +[source,cpp] +---- +// Don't check revocation (default) +ctx.set_revocation_policy( tls::revocation_policy::disabled ); + +// Check but allow if status is unknown (lenient) +ctx.set_revocation_policy( tls::revocation_policy::soft_fail ); + +// Require successful revocation check (strict) +ctx.set_revocation_policy( tls::revocation_policy::hard_fail ); +---- + +=== CRL (Certificate Revocation Lists) + +Load CRLs from the CA that issued the certificates you're verifying: + +[source,cpp] +---- +// From file +ctx.add_crl_file( "/path/to/issuer.crl" ).value(); + +// From memory (e.g., fetched via HTTP) +std::string crl_data = fetch_crl_from_url( crl_url ); +ctx.add_crl( crl_data ).value(); +---- + +CRLs must be refreshed periodically as they expire. + +=== OCSP Stapling + +OCSP provides per-certificate revocation status without downloading +large CRLs. + +**Server-side** (provide pre-fetched OCSP response): + +[source,cpp] +---- +// Fetch OCSP response for your certificate +std::string ocsp_response = fetch_ocsp_response(); + +// Provide to clients during handshake +ctx.set_ocsp_staple( ocsp_response ).value(); +---- + +**Client-side** (require server to staple): + +[source,cpp] +---- +// Fail if server doesn't provide OCSP staple +ctx.set_require_ocsp_staple( true ); +---- + +=== Bootstrap vs. Hardened Connections + +A common pattern uses two context configurations: + +[source,cpp] +---- +// Bootstrap context: for fetching revocation data +tls::context bootstrap_ctx; +bootstrap_ctx.set_default_verify_paths().value(); +bootstrap_ctx.set_verify_mode( tls::verify_mode::peer ).value(); +bootstrap_ctx.set_revocation_policy( tls::revocation_policy::disabled ); + +// Hardened context: for sensitive connections +tls::context hardened_ctx; +hardened_ctx.set_default_verify_paths().value(); +hardened_ctx.set_verify_mode( tls::verify_mode::peer ).value(); +hardened_ctx.add_crl_file( "cached.crl" ).value(); +hardened_ctx.set_revocation_policy( tls::revocation_policy::hard_fail ); +---- + +== Password Handling + +Private keys and PKCS#12 files are often encrypted with a password. +Set a password callback before loading encrypted material. + +=== Password Callback + +[source,cpp] +---- +// Set callback before loading encrypted key +ctx.set_password_callback( + []( std::size_t max_length, tls::password_purpose purpose ) + { + // purpose: for_reading (decrypt) or for_writing (encrypt) + return std::string( "my-secret-password" ); + }); + +// Now load encrypted private key +ctx.use_private_key_file( "encrypted.key", tls::file_format::pem ).value(); +---- + +=== Secure Password Handling + +In production, don't hardcode passwords: + +[source,cpp] +---- +ctx.set_password_callback( + []( std::size_t max_length, tls::password_purpose purpose ) + { + // Read from environment + if( auto* pw = std::getenv( "TLS_KEY_PASSWORD" ) ) + return std::string( pw ); + + // Or prompt user + return prompt_user_for_password(); + }); +---- + +=== PKCS#12 Passwords + +PKCS#12 files take the password directly (no callback needed): + +[source,cpp] +---- +ctx.use_pkcs12_file( "credentials.pfx", "bundle-password" ).value(); +---- + +For PKCS#12 loaded from memory: + +[source,cpp] +---- +ctx.use_pkcs12( pkcs12_data, "bundle-password" ).value(); +---- + +== Complete Examples + +This section demonstrates complete, practical configurations for common +scenarios. + +=== HTTPS Client Context + +[source,cpp] +---- +tls::context make_https_client_context() +{ + tls::context ctx; + + // Trust system CAs for public websites + ctx.set_default_verify_paths().value(); + + // Verify server certificates + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Modern TLS only + ctx.set_min_protocol_version( tls::version::tls_1_2 ).value(); + + return ctx; +} + +// Usage with TLS stream +tls::context ctx = make_https_client_context(); +tls::stream secure( sock, ctx ); +co_await secure.handshake( tls::role::client ); +---- + +=== TLS Server Context + +[source,cpp] +---- +tls::context make_server_context() +{ + tls::context ctx; + + // Load server credentials + ctx.use_certificate_chain_file( "fullchain.pem" ).value(); + ctx.use_private_key_file( "privkey.pem", tls::file_format::pem ).value(); + + // Don't verify client certificates (no mTLS) + ctx.set_verify_mode( tls::verify_mode::none ).value(); + + return ctx; +} +---- + +=== Mutual TLS (mTLS) + +[source,cpp] +---- +tls::context make_mtls_client_context() +{ + tls::context ctx; + + // Client credentials for mTLS + ctx.use_certificate_chain_file( "client.crt" ).value(); + ctx.use_private_key_file( "client.key", tls::file_format::pem ).value(); + + // Trust specific CA for server verification + ctx.load_verify_file( "server-ca.crt" ).value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} +---- + +== Error Handling + +All fallible operations return `system::result`. Use `.value()` to +throw on error, or check explicitly: + +[source,cpp] +---- +// Throw on error +ctx.use_certificate_file( "cert.pem", tls::file_format::pem ).value(); + +// Check error explicitly +if( auto r = ctx.load_verify_file( "ca.crt" ); !r ) +{ + std::cerr << "Failed to load CA: " << r.error().message() << "\n"; + return; +} +---- + +Common errors include: + +* File not found or permission denied +* Invalid certificate or key format +* Key doesn't match certificate +* Wrong passphrase for encrypted key or PKCS#12 + +== Next Steps + +* xref:../guide/tls.adoc[TLS Encryption] — Using TLS streams +* xref:http-client.adoc[HTTP Client Tutorial] — HTTPS example diff --git a/example/https-client/https_client.cpp b/example/https-client/https_client.cpp index 6ad0eee..b64adc6 100644 --- a/example/https-client/https_client.cpp +++ b/example/https-client/https_client.cpp @@ -8,7 +8,7 @@ // #include -#include +#include #include #include #include diff --git a/example/tls_context_examples.cpp b/example/tls_context_examples.cpp new file mode 100644 index 0000000..ecf3575 --- /dev/null +++ b/example/tls_context_examples.cpp @@ -0,0 +1,486 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +// Examples demonstrating tls::context API usage patterns. +// These examples are not meant to compile; they validate API ergonomics. + +#include + +namespace tls = boost::corosio::tls; + +//------------------------------------------------------------------------------ +// +// HTTPS Client Context +// +//------------------------------------------------------------------------------ + +// Basic HTTPS client that trusts system CAs +tls::context make_https_client() +{ + tls::context ctx; + + // Use system trust store for public websites + ctx.set_default_verify_paths().value(); + + // Verify the server certificate + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Set the hostname for SNI and certificate verification + ctx.set_hostname( "api.example.com" ); + + return ctx; +} + +// HTTPS client with pinned CA (don't trust system store) +tls::context make_pinned_ca_client( std::string_view ca_pem ) +{ + tls::context ctx; + + // Only trust this specific CA + ctx.add_certificate_authority( ca_pem ).value(); + + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + ctx.set_hostname( "internal.example.com" ); + + return ctx; +} + +// HTTP/2 client with ALPN +tls::context make_http2_client() +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Prefer HTTP/2, fall back to HTTP/1.1 + ctx.set_alpn( { "h2", "http/1.1" } ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// TLS Server Context +// +//------------------------------------------------------------------------------ + +// Basic TLS server (no client verification) +tls::context make_basic_server() +{ + tls::context ctx; + + // Load certificate chain and private key + ctx.use_certificate_chain_file( "server-fullchain.pem" ).value(); + ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); + + // Don't verify clients (no mTLS) + ctx.set_verify_mode( tls::verify_mode::none ).value(); + + return ctx; +} + +// mTLS server (requires client certificates) +tls::context make_mtls_server() +{ + tls::context ctx; + + // Server credentials + ctx.use_certificate_chain_file( "server-fullchain.pem" ).value(); + ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); + + // Trust this CA for client certificates + ctx.load_verify_file( "client-ca.crt" ).value(); + + // Require clients to present a valid certificate + ctx.set_verify_mode( tls::verify_mode::require_peer ).value(); + + return ctx; +} + +// Server with PKCS#12 credentials +tls::context make_server_from_pfx() +{ + tls::context ctx; + + // Load all credentials from a single file + ctx.use_pkcs12_file( "server.pfx", "bundle-password" ).value(); + + ctx.set_verify_mode( tls::verify_mode::none ).value(); + + return ctx; +} + +// Server with encrypted private key +tls::context make_server_encrypted_key() +{ + tls::context ctx; + + // Set password callback before loading encrypted key + ctx.set_password_callback( + []( std::size_t max_len, tls::password_purpose purpose ) + { + // Read from environment or secret manager + return std::string( std::getenv( "TLS_KEY_PASSWORD" ) ); + }); + + ctx.use_certificate_chain_file( "server.crt" ).value(); + ctx.use_private_key_file( "server-encrypted.key", tls::file_format::pem ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// mTLS Client Context +// +//------------------------------------------------------------------------------ + +// Client with client certificate for mTLS +tls::context make_mtls_client() +{ + tls::context ctx; + + // Client credentials for mTLS + ctx.use_certificate_file( "client.crt", tls::file_format::pem ).value(); + ctx.use_private_key_file( "client.key", tls::file_format::pem ).value(); + + // Trust specific CA for server verification + ctx.load_verify_file( "server-ca.crt" ).value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Protocol Version Configuration +// +//------------------------------------------------------------------------------ + +// TLS 1.3 only +tls::context make_tls13_only() +{ + tls::context ctx; + + ctx.set_min_protocol_version( tls::version::tls_1_3 ).value(); + ctx.set_max_protocol_version( tls::version::tls_1_3 ).value(); + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +// Allow TLS 1.2+ (default behavior made explicit) +tls::context make_tls12_plus() +{ + tls::context ctx; + + ctx.set_min_protocol_version( tls::version::tls_1_2 ).value(); + // No max = allow newest + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Cipher Suite Configuration +// +//------------------------------------------------------------------------------ + +// High-security cipher configuration +tls::context make_high_security() +{ + tls::context ctx; + + // Only ECDHE key exchange with AESGCM or ChaCha20 + ctx.set_ciphersuites( "ECDHE+AESGCM:ECDHE+CHACHA20" ).value(); + + // TLS 1.3 only + ctx.set_min_protocol_version( tls::version::tls_1_3 ).value(); + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Revocation Checking +// +//------------------------------------------------------------------------------ + +// Client with CRL checking +tls::context make_client_with_crl( std::string_view crl_path ) +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.add_crl_file( crl_path ).value(); + + // Fail if certificate is revoked, allow if status unknown + ctx.set_revocation_policy( tls::revocation_policy::soft_fail ); + + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +// Client requiring OCSP stapling +tls::context make_client_require_ocsp() +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Require server to provide OCSP staple + ctx.set_require_ocsp_staple( true ); + + return ctx; +} + +// Server with OCSP stapling +tls::context make_server_with_ocsp( std::string_view ocsp_response ) +{ + tls::context ctx; + + ctx.use_certificate_chain_file( "server.crt" ).value(); + ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); + + // Provide pre-fetched OCSP response to clients + ctx.set_ocsp_staple( ocsp_response ).value(); + + return ctx; +} + +// Strict revocation checking +tls::context make_hardened_client() +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Fail if revocation status cannot be determined + ctx.set_revocation_policy( tls::revocation_policy::hard_fail ); + + // Require OCSP staple from server + ctx.set_require_ocsp_staple( true ); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Custom Verification +// +//------------------------------------------------------------------------------ + +// Client with custom verification callback +tls::context make_client_custom_verify() +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + ctx.set_verify_callback( + []( bool preverified, auto& verify_ctx ) -> bool + { + if( !preverified ) + { + // Standard verification failed - could log here + return false; + } + + // Additional custom checks could go here: + // - Check specific certificate fields + // - Verify against a pinned certificate + // - Log certificate details + + return true; + }).value(); + + return ctx; +} + +// Verify depth limit +tls::context make_client_limited_depth() +{ + tls::context ctx; + + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Allow at most 2 intermediate certificates + ctx.set_verify_depth( 2 ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Loading from Memory +// +//------------------------------------------------------------------------------ + +// Load all credentials from memory buffers +tls::context make_from_memory( + std::string_view cert_pem, + std::string_view key_pem, + std::string_view ca_pem ) +{ + tls::context ctx; + + // From vault/secret manager + ctx.use_certificate_chain( cert_pem ).value(); + ctx.use_private_key( key_pem, tls::file_format::pem ).value(); + ctx.add_certificate_authority( ca_pem ).value(); + + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +// Load PKCS#12 from memory +tls::context make_from_pkcs12_memory( + std::string_view pkcs12_data, + std::string_view passphrase ) +{ + tls::context ctx; + + ctx.use_pkcs12( pkcs12_data, passphrase ).value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// DER Format +// +//------------------------------------------------------------------------------ + +// Load DER-encoded certificate and key +tls::context make_from_der() +{ + tls::context ctx; + + ctx.use_certificate_file( "server.der", tls::file_format::der ).value(); + ctx.use_private_key_file( "server.key.der", tls::file_format::der ).value(); + + return ctx; +} + +//------------------------------------------------------------------------------ +// +// Shared Context +// +//------------------------------------------------------------------------------ + +// Demonstrate shared ownership +void demonstrate_sharing() +{ + // Create a context + tls::context original; + original.set_default_verify_paths().value(); + original.set_verify_mode( tls::verify_mode::peer ).value(); + + // Share via copy - both point to same underlying state + tls::context copy1 = original; + tls::context copy2 = original; + + // Changes to copy1 affect copy2 and original + // (they all share the same impl) + + // Move transfers ownership + tls::context moved = std::move( original ); + // original is now empty +} + +//------------------------------------------------------------------------------ +// +// Error Handling Patterns +// +//------------------------------------------------------------------------------ + +// Throw on error (simple code, let exceptions propagate) +void load_throwing() +{ + tls::context ctx; + + ctx.use_certificate_chain_file( "cert.pem" ).value(); // throws on error + ctx.use_private_key_file( "key.pem", tls::file_format::pem ).value(); + ctx.set_default_verify_paths().value(); +} + +// Check errors explicitly +bool load_checked( tls::context& ctx, std::string& error_msg ) +{ + if( auto r = ctx.use_certificate_chain_file( "cert.pem" ); !r ) + { + error_msg = "Certificate: " + std::string( r.error().message() ); + return false; + } + + if( auto r = ctx.use_private_key_file( "key.pem", tls::file_format::pem ); !r ) + { + error_msg = "Key: " + std::string( r.error().message() ); + return false; + } + + if( auto r = ctx.set_default_verify_paths(); !r ) + { + error_msg = "CA store: " + std::string( r.error().message() ); + return false; + } + + return true; +} + +// Mixed approach - throw for programmer errors, check for runtime errors +void load_mixed() +{ + tls::context ctx; + + // File loading might fail at runtime + if( auto r = ctx.use_certificate_chain_file( "cert.pem" ); !r ) + { + // Handle missing file gracefully + std::cerr << "Certificate not found: " << r.error().message() << "\n"; + return; + } + + // Protocol settings won't fail if arguments are valid + ctx.set_min_protocol_version( tls::version::tls_1_2 ).value(); + ctx.set_verify_mode( tls::verify_mode::peer ).value(); +} + +//------------------------------------------------------------------------------ +// +// Main (not compiled, just for documentation) +// +//------------------------------------------------------------------------------ + +int main() +{ + // These examples demonstrate API ergonomics + + auto https = make_https_client(); + auto server = make_basic_server(); + auto mtls = make_mtls_server(); + + return 0; +} diff --git a/include/boost/corosio/any_bufref.hpp b/include/boost/corosio/any_bufref.hpp deleted file mode 100644 index 57d4c14..0000000 --- a/include/boost/corosio/any_bufref.hpp +++ /dev/null @@ -1,129 +0,0 @@ -// -// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// -// Official repository: https://github.com/cppalliance/corosio -// - -#ifndef BOOST_COROSIO_ANY_BUFREF_HPP -#define BOOST_COROSIO_ANY_BUFREF_HPP - -#include -#include - -#include - -namespace boost { -namespace corosio { - -/** A type-erased buffer sequence I/O parameter. - - This class provides a type-erased interface for iterating - over buffer sequences without knowing the concrete type. - It allows asynchronous operations to efficiently type-erase - the buffer sequence parameter, avoiding the need to - templatize the implementation. - - @par Example - The following shows the minimal form of an awaitable, templated on the - buffer sequence type, with only an `await_suspend` method. The example - demonstrates that you can construct an `any_bufref` in the parameter - list when calling a virtual interface; there is no need to create a - separate variable if not desired. - - @code - template - struct awaitable - { - Buffers b; - - void await_suspend(std::coroutine_handle<>) - { - my_virtual_engine_submit(any_bufref(b)); - } - }; - - // Example virtual interface accepting any_bufref - void my_virtual_engine_submit(any_bufref p) - { - capy::mutable_buffer temp[8]; - std::size_t n = p.copy_to(temp, 8); - // ... handle the buffers ... - } - @endcode -*/ -class any_bufref -{ -public: - /** Construct from a const buffer sequence. - - @param bs The buffer sequence to adapt. - */ - template - explicit - any_bufref(BS const& bs) noexcept - : bs_(&bs) - , fn_(©_impl) - { - } - - /** Fill an array with buffers from the sequence. - - @param dest Pointer to array of mutable buffer descriptors. - @param n Maximum number of buffers to copy. - - @return The number of buffers actually copied. - */ - std::size_t - copy_to( - capy::mutable_buffer* dest, - std::size_t n) const noexcept - { - return fn_(bs_, dest, n); - } - -private: - template - static std::size_t - copy_impl( - void const* p, - capy::mutable_buffer* dest, - std::size_t n) - { - auto const& bs = *static_cast(p); - auto it = capy::begin(bs); - auto const end_it = capy::end(bs); - - std::size_t i = 0; - if constexpr (capy::MutableBufferSequence) - { - for (; it != end_it && i < n; ++it, ++i) - dest[i] = *it; - } - else - { - for (; it != end_it && i < n; ++it, ++i) - { - auto const& buf = *it; - dest[i] = capy::mutable_buffer( - const_cast( - static_cast(buf.data())), - buf.size()); - } - } - return i; - } - - using fn_t = std::size_t(*)(void const*, - capy::mutable_buffer*, std::size_t); - - void const* bs_; - fn_t fn_; -}; - -} // namespace corosio -} // namespace boost - -#endif diff --git a/include/boost/corosio/io_result.hpp b/include/boost/corosio/io_result.hpp index 0cca1c8..360a1f0 100644 --- a/include/boost/corosio/io_result.hpp +++ b/include/boost/corosio/io_result.hpp @@ -63,15 +63,6 @@ struct io_result<> /** The error code from the operation. */ system::error_code ec; - /** Check if the operation succeeded. - - @return `true` if no error occurred. - */ - explicit operator bool() const noexcept - { - return !ec; - } - /** Throw if an error occurred. @throws system::system_error if `ec` is set. @@ -113,15 +104,6 @@ struct io_result /** The number of bytes transferred. */ std::size_t n = 0; - /** Check if the operation succeeded. - - @return `true` if no error occurred. - */ - explicit operator bool() const noexcept - { - return !ec; - } - /** Get bytes transferred, throwing on error. @return The number of bytes transferred. @@ -158,15 +140,6 @@ struct io_result /** The result value. */ T value_ = {}; - /** Check if the operation succeeded. - - @return `true` if no error occurred. - */ - explicit operator bool() const noexcept - { - return !ec; - } - /** Get the value, throwing on error. @return The result value. @@ -205,15 +178,6 @@ struct io_result /** The result values. */ std::tuple values; - /** Check if the operation succeeded. - - @return `true` if no error occurred. - */ - explicit operator bool() const noexcept - { - return !ec; - } - /** Get the values, throwing on error. @return The result values as a tuple. diff --git a/include/boost/corosio/io_stream.hpp b/include/boost/corosio/io_stream.hpp index 5f669c6..7812106 100644 --- a/include/boost/corosio/io_stream.hpp +++ b/include/boost/corosio/io_stream.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -127,7 +127,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object std::coroutine_handle<> h, Ex const& ex) -> std::coroutine_handle<> { - any_bufref param(buffers_); + capy::any_bufref param(buffers_); ios_.get().read_some(h, ex, param, token_, &ec_, &bytes_transferred_); return std::noop_coroutine(); } @@ -139,7 +139,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object std::stop_token token) -> std::coroutine_handle<> { token_ = std::move(token); - any_bufref param(buffers_); + capy::any_bufref param(buffers_); ios_.get().read_some(h, ex, param, token_, &ec_, &bytes_transferred_); return std::noop_coroutine(); } @@ -179,7 +179,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object std::coroutine_handle<> h, Ex const& ex) -> std::coroutine_handle<> { - any_bufref param(buffers_); + capy::any_bufref param(buffers_); ios_.get().write_some(h, ex, param, token_, &ec_, &bytes_transferred_); return std::noop_coroutine(); } @@ -191,7 +191,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object std::stop_token token) -> std::coroutine_handle<> { token_ = std::move(token); - any_bufref param(buffers_); + capy::any_bufref param(buffers_); ios_.get().write_some(h, ex, param, token_, &ec_, &bytes_transferred_); return std::noop_coroutine(); } @@ -203,7 +203,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object virtual void read_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) = 0; @@ -211,7 +211,7 @@ class BOOST_COROSIO_DECL io_stream : public io_object virtual void write_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) = 0; diff --git a/include/boost/corosio/read.hpp b/include/boost/corosio/read.hpp index 6c656d9..a19447d 100644 --- a/include/boost/corosio/read.hpp +++ b/include/boost/corosio/read.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/boost/corosio/socket.hpp b/include/boost/corosio/socket.hpp index 4c30479..c401452 100644 --- a/include/boost/corosio/socket.hpp +++ b/include/boost/corosio/socket.hpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/boost/corosio/tcp_server.hpp b/include/boost/corosio/tcp_server.hpp index c970f32..ceb38d5 100644 --- a/include/boost/corosio/tcp_server.hpp +++ b/include/boost/corosio/tcp_server.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -38,21 +38,42 @@ namespace corosio { class BOOST_COROSIO_DECL tcp_server { -protected: +public: class worker_base; class launcher; class workers; private: struct waiter; - class push_aw; - class pop_aw; + io_context& ctx_; + capy::any_executor ex_; + waiter* waiters_ = nullptr; + std::vector ports_; + + template struct launch_wrapper { struct promise_type { - capy::any_executor_ref d; + Ex ex; // Stored directly in frame, no allocation + + // For regular coroutines: first arg is the executor + template + requires capy::Executor> + promise_type(E e, Args&&...) + : ex(std::move(e)) + { + } + + // For lambda coroutines: first arg is lambda closure, second is executor + template + requires (!capy::Executor> && + capy::Executor>) + promise_type(Closure&&, E e, Args&&...) + : ex(std::move(e)) + { + } launch_wrapper get_return_object() noexcept { return {std::coroutine_handle::from_promise(*this)}; @@ -69,21 +90,18 @@ class BOOST_COROSIO_DECL struct adapter { std::decay_t aw; - capy::any_executor_ref d; + Ex* ex_ptr; bool await_ready() { return aw.await_ready(); } auto await_resume() { return aw.await_resume(); } auto await_suspend(std::coroutine_handle h) { - if constexpr (capy::IoAwaitable< - std::decay_t, capy::any_executor_ref>) - return aw.await_suspend(h, d, std::stop_token{}); - else - return aw.await_suspend(h); + static_assert(capy::IoAwaitable, Ex>); + return aw.await_suspend(h, *ex_ptr, std::stop_token{}); } }; - return adapter{std::forward(a), d}; + return adapter{std::forward(a), &ex}; } }; @@ -110,12 +128,6 @@ class BOOST_COROSIO_DECL launch_wrapper& operator=(launch_wrapper&&) = delete; }; - io_context& ctx_; - capy::any_executor_ref dispatch_; - capy::any_executor_ref post_; - waiter* waiters_ = nullptr; - std::vector ports_; - struct waiter { waiter* next; @@ -137,15 +149,14 @@ class BOOST_COROSIO_DECL await_suspend(std::coroutine_handle<> h, Ex const&, std::stop_token) noexcept { // Dispatch to server's executor before touching shared state - return self_.dispatch_.dispatch(h); + return self_.ex_.dispatch(h); } void await_resume() noexcept; - }; - - push_aw push(worker_base& w); - void push_sync(worker_base& w) noexcept; + private: + std::coroutine_handle<> await_suspend_impl(std::coroutine_handle<> h) noexcept; + }; class BOOST_COROSIO_DECL pop_aw { @@ -168,14 +179,22 @@ class BOOST_COROSIO_DECL } system::result await_resume() noexcept; + + private: + bool await_suspend_impl(std::coroutine_handle<> h) noexcept; }; + push_aw push(worker_base& w); + + void push_sync(worker_base& w) noexcept; + pop_aw pop(); capy::task do_accept(acceptor& acc); -protected: - class BOOST_COROSIO_DECL worker_base +public: + class BOOST_COROSIO_DECL + worker_base { worker_base* next = nullptr; @@ -183,19 +202,13 @@ class BOOST_COROSIO_DECL friend class workers; public: - socket sock; - virtual ~worker_base() = default; virtual void run(launcher launch) = 0; - - protected: - worker_base(capy::execution_context& ctx) - : sock(ctx) - { - } + virtual corosio::socket& socket() = 0; }; - class BOOST_COROSIO_DECL workers + class BOOST_COROSIO_DECL + workers { friend class tcp_server; @@ -238,8 +251,6 @@ class BOOST_COROSIO_DECL std::size_t size() const noexcept { return v_.size(); } }; - workers wv_; - class BOOST_COROSIO_DECL launcher { tcp_server* srv_; @@ -285,27 +296,29 @@ class BOOST_COROSIO_DECL } guard{srv_, w}; auto wrapper = - [](Executor ex, tcp_server* self, capy::task t, worker_base* wp) - -> launch_wrapper - { - (void)ex; // Prevent executor destruction while coroutine runs - co_await std::move(t); - co_await self->push(*wp); - }(ex, srv_, std::move(task), w); + [](Executor ex, tcp_server* self, capy::task t, worker_base* wp) + -> launch_wrapper + { + (void)ex; // Executor stored in promise via constructor + co_await std::move(t); + co_await self->push(*wp); + }(ex, srv_, std::move(task), w); + // Executor is now stored in promise via constructor ex.post(std::exchange(wrapper.h, nullptr)); // Release before post guard.w = nullptr; // Success - dismiss guard } }; protected: + workers wv_; // API for derived + template tcp_server( io_context& ctx, Ex const& ex) : ctx_(ctx) - , dispatch_(ex) - , post_(ex) + , ex_(ex) { } diff --git a/include/boost/corosio/tls/context.hpp b/include/boost/corosio/tls/context.hpp new file mode 100644 index 0000000..2a1591e --- /dev/null +++ b/include/boost/corosio/tls/context.hpp @@ -0,0 +1,867 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef BOOST_COROSIO_TLS_CONTEXT_HPP +#define BOOST_COROSIO_TLS_CONTEXT_HPP + +#include +#include + +#include +#include +#include + +namespace boost { +namespace corosio { +namespace tls { + +//------------------------------------------------------------------------------ +// +// Enumerations +// +//------------------------------------------------------------------------------ + +/** TLS handshake role. + + Specifies whether to perform the TLS handshake as a client or server. + + @see stream::handshake +*/ +enum class role +{ + /// Perform handshake as the connecting client. + client, + + /// Perform handshake as the accepting server. + server +}; + +/** TLS protocol version. + + Specifies the minimum or maximum TLS protocol version to use + for connections. Only modern, secure versions are supported. + + @see context::set_min_protocol_version + @see context::set_max_protocol_version +*/ +enum class version +{ + /// TLS 1.2 (RFC 5246). + tls_1_2, + + /// TLS 1.3 (RFC 8446). + tls_1_3 +}; + +/** Certificate and key file format. + + Specifies the encoding format for certificate and key data. + + @see context::use_certificate + @see context::use_private_key +*/ +enum class file_format +{ + /// PEM format (Base64-encoded with header/footer lines). + pem, + + /// DER format (raw ASN.1 binary encoding). + der +}; + +/** Peer certificate verification mode. + + Controls how the TLS implementation verifies the peer's + certificate during the handshake. + + @see context::set_verify_mode +*/ +enum class verify_mode +{ + /// Do not request or verify the peer certificate. + none, + + /// Request and verify the peer certificate if presented. + peer, + + /// Require and verify the peer certificate (fail if not presented). + require_peer +}; + +/** Certificate revocation checking policy. + + Controls how certificate revocation status is checked during + verification. + + @see context::set_revocation_policy +*/ +enum class revocation_policy +{ + /// Do not check revocation status. + disabled, + + /// Check revocation but allow connection if status is unknown. + soft_fail, + + /// Require successful revocation check (fail if status is unknown). + hard_fail +}; + +/** Purpose for password callback invocation. + + Indicates whether the password is needed for reading (decrypting) + or writing (encrypting) key material. + + @see context::set_password_callback +*/ +enum class password_purpose +{ + /// Password needed to decrypt/read protected key material. + for_reading, + + /// Password needed to encrypt/write protected key material. + for_writing +}; + +class context; + +namespace detail { +struct context_data; +context_data const& +get_context_data( context const& ) noexcept; +} // namespace detail + +/** A portable TLS context for certificate and settings storage. + + The `context` class provides a backend-agnostic interface for + configuring TLS connections. It stores credentials (certificates and + private keys), trust anchors, protocol settings, and verification + options that are used when establishing TLS connections. + + This class is a shared handle to an opaque implementation. Copies + share the same underlying state. This allows contexts to be passed + by value and shared across multiple TLS streams. + + This class abstracts the configuration phase of TLS across multiple + backend implementations (OpenSSL, WolfSSL, mbedTLS, Schannel, etc.), + allowing portable code that works regardless of which TLS library + is linked. + + @par Modification After Stream Creation + + Modifying a context after a TLS stream has been created from it + results in undefined behavior. The context's configuration is + captured when the first stream is constructed, and subsequent + modifications are not reflected in existing or new streams + sharing the context. + + If different configurations are needed, create separate context + objects. + + @par Thread Safety + + Distinct objects: Safe. + + Shared objects: Unsafe. A context must not be modified while + any thread is creating streams from it. + + @par Example + @code + // Create a client context with system trust anchors + corosio::tls::context ctx; + ctx.set_default_verify_paths().value(); + ctx.set_verify_mode( corosio::tls::verify_mode::peer ).value(); + ctx.set_hostname( "example.com" ); + + // Use with a TLS stream + corosio::tls::stream secure( sock, ctx ); + co_await secure.handshake( corosio::tls::role::client ); + @endcode + + @see role +*/ +class BOOST_COROSIO_DECL context +{ + struct impl; + std::shared_ptr impl_; + + friend + detail::context_data const& + detail::get_context_data( context const& ) noexcept; + +public: + /** Construct a default TLS context. + + Creates a context with default settings suitable for TLS 1.2 + and TLS 1.3 connections. No certificates or trust anchors are + loaded; call the appropriate methods to configure credentials + and verification. + + @par Example + @code + corosio::tls::context ctx; + @endcode + */ + context(); + + /** Copy constructor. + + Creates a new handle that shares ownership of the underlying + TLS context state with `other`. + + @param other The context to copy from. + */ + context( context const& other ) = default; + + /** Copy assignment operator. + + Releases the current context's shared ownership and acquires + shared ownership of `other`'s underlying state. + + @param other The context to copy from. + + @return Reference to this context. + */ + context& operator=( context const& other ) = default; + + /** Move constructor. + + Transfers ownership of the TLS context from another instance. + After the move, `other` is in a valid but empty state. + + @param other The context to move from. + */ + context( context&& other ) noexcept = default; + + /** Move assignment operator. + + Releases the current context's shared ownership and transfers + ownership from another instance. After the move, `other` is + in a valid but empty state. + + @param other The context to move from. + + @return Reference to this context. + */ + context& operator=( context&& other ) noexcept = default; + + /** Destructor. + + Releases this handle's shared ownership of the underlying + context. The context state is destroyed when the last handle + is released. + */ + ~context() = default; + + //-------------------------------------------------------------------------- + // + // Credential Loading + // + //-------------------------------------------------------------------------- + + /** Load the entity certificate from a memory buffer. + + Sets the certificate that identifies this endpoint to the peer. + For servers, this is the server certificate. For clients using + mutual TLS, this is the client certificate. + + The certificate must match the private key loaded via + `use_private_key()` or `use_private_key_file()`. + + @param certificate The certificate data. + + @param format The encoding format of the certificate data. + + @return Success, or an error if the certificate could not be parsed + or is invalid. + + @see use_certificate_file + @see use_private_key + */ + system::result + use_certificate( + std::string_view certificate, + file_format format ); + + /** Load the entity certificate from a file. + + Sets the certificate that identifies this endpoint to the peer. + For servers, this is the server certificate. For clients using + mutual TLS, this is the client certificate. + + @param filename Path to the certificate file. + + @param format The encoding format of the file. + + @return Success, or an error if the file could not be read or the + certificate is invalid. + + @par Example + @code + ctx.use_certificate_file( "server.crt", tls::file_format::pem ).value(); + @endcode + + @see use_certificate + @see use_private_key_file + */ + system::result + use_certificate_file( + std::string_view filename, + file_format format ); + + /** Load a certificate chain from a memory buffer. + + Loads the entity certificate followed by intermediate CA certificates. + The chain should be ordered from leaf to root (excluding the root). + This is the typical format for PEM certificate bundles. + + @param chain The certificate chain data in PEM format (concatenated + certificates). + + @return Success, or an error if the chain could not be parsed. + + @see use_certificate_chain_file + */ + system::result + use_certificate_chain( std::string_view chain ); + + /** Load a certificate chain from a file. + + Loads the entity certificate followed by intermediate CA certificates + from a PEM file. The file should contain concatenated PEM certificates + ordered from leaf to root (excluding the root). + + @param filename Path to the certificate chain file. + + @return Success, or an error if the file could not be read or parsed. + + @par Example + @code + // Load certificate chain (cert + intermediates) + ctx.use_certificate_chain_file( "fullchain.pem" ).value(); + @endcode + + @see use_certificate_chain + */ + system::result + use_certificate_chain_file( std::string_view filename ); + + /** Load the private key from a memory buffer. + + Sets the private key corresponding to the entity certificate. + The key must match the certificate loaded via `use_certificate()` + or `use_certificate_chain()`. + + If the key is encrypted, set a password callback via + `set_password_callback()` before calling this function. + + @param private_key The private key data. + + @param format The encoding format of the key data. + + @return Success, or an error if the key could not be parsed, + is encrypted without a password callback, or doesn't match + the certificate. + + @see use_private_key_file + @see set_password_callback + */ + system::result + use_private_key( + std::string_view private_key, + file_format format ); + + /** Load the private key from a file. + + Sets the private key corresponding to the entity certificate. + The key must match the certificate loaded via `use_certificate_file()` + or `use_certificate_chain_file()`. + + If the key file is encrypted, set a password callback via + `set_password_callback()` before calling this function. + + @param filename Path to the private key file. + + @param format The encoding format of the file. + + @return Success, or an error if the file could not be read, + the key is invalid, or it doesn't match the certificate. + + @par Example + @code + ctx.use_private_key_file( "server.key", tls::file_format::pem ).value(); + @endcode + + @see use_private_key + @see set_password_callback + */ + system::result + use_private_key_file( + std::string_view filename, + file_format format ); + + /** Load credentials from a PKCS#12 bundle in memory. + + PKCS#12 (also known as PFX) is a binary format that bundles a + certificate, private key, and optionally intermediate certificates + into a single password-protected file. + + @param data The PKCS#12 bundle data. + + @param passphrase The password protecting the bundle. + + @return Success, or an error if the bundle could not be parsed + or the passphrase is incorrect. + + @see use_pkcs12_file + */ + system::result + use_pkcs12( + std::string_view data, + std::string_view passphrase ); + + /** Load credentials from a PKCS#12 file. + + PKCS#12 (also known as PFX) is a binary format that bundles a + certificate, private key, and optionally intermediate certificates + into a single password-protected file. This is common on Windows + and for certificates exported from browsers. + + @param filename Path to the PKCS#12 file. + + @param passphrase The password protecting the file. + + @return Success, or an error if the file could not be read, + parsed, or the passphrase is incorrect. + + @par Example + @code + ctx.use_pkcs12_file( "credentials.pfx", "secret" ).value(); + @endcode + + @see use_pkcs12 + */ + system::result + use_pkcs12_file( + std::string_view filename, + std::string_view passphrase ); + + //-------------------------------------------------------------------------- + // + // Trust Anchors + // + //-------------------------------------------------------------------------- + + /** Add a certificate authority for peer verification. + + Adds a single CA certificate to the trust store used for verifying + peer certificates. Call this multiple times to add multiple CAs, + or use `load_verify_file()` for a bundle. + + @param ca The CA certificate data in PEM format. + + @return Success, or an error if the certificate could not be parsed. + + @see load_verify_file + @see set_default_verify_paths + */ + system::result + add_certificate_authority( std::string_view ca ); + + /** Load CA certificates from a file. + + Loads one or more CA certificates from a PEM file. The file may + contain multiple concatenated PEM certificates. + + @param filename Path to a PEM file containing CA certificates. + + @return Success, or an error if the file could not be read or parsed. + + @par Example + @code + // Load a custom CA bundle + ctx.load_verify_file( "/etc/ssl/certs/ca-certificates.crt" ).value(); + @endcode + + @see add_certificate_authority + @see add_verify_path + */ + system::result + load_verify_file( std::string_view filename ); + + /** Add a directory of CA certificates for verification. + + Adds a directory containing CA certificate files. Each file must + contain a single certificate in PEM format, named using the + subject name hash (as generated by `openssl rehash` or + `c_rehash`). + + @param path Path to the directory containing hashed CA certificates. + + @return Success, or an error if the directory is invalid. + + @par Example + @code + ctx.add_verify_path( "/etc/ssl/certs" ).value(); + @endcode + + @see load_verify_file + @see set_default_verify_paths + */ + system::result + add_verify_path( std::string_view path ); + + /** Use the system default CA certificate store. + + Configures the context to use the operating system's default + trust store for peer certificate verification. This is the + recommended approach for HTTPS clients connecting to public + servers. + + On different platforms this uses: + - Linux: `/etc/ssl/certs` or distribution-specific paths + - macOS: System Keychain + - Windows: Windows Certificate Store + + @return Success, or an error if the system store could not be loaded. + + @par Example + @code + // Trust the same CAs as the system + ctx.set_default_verify_paths().value(); + @endcode + + @see load_verify_file + @see add_verify_path + */ + system::result + set_default_verify_paths(); + + //-------------------------------------------------------------------------- + // + // Protocol Configuration + // + //-------------------------------------------------------------------------- + + /** Set the minimum TLS protocol version. + + Connections will reject protocol versions older than this. + The default allows TLS 1.2 and newer. + + @param v The minimum protocol version to accept. + + @return Success, or an error if the version is not supported + by the backend. + + @par Example + @code + // Require TLS 1.3 minimum + ctx.set_min_protocol_version( tls::version::tls_1_3 ).value(); + @endcode + + @see set_max_protocol_version + */ + system::result + set_min_protocol_version( version v ); + + /** Set the maximum TLS protocol version. + + Connections will not negotiate protocol versions newer than this. + The default allows the newest supported version. + + @param v The maximum protocol version to accept. + + @return Success, or an error if the version is not supported + by the backend. + + @see set_min_protocol_version + */ + system::result + set_max_protocol_version( version v ); + + /** Set the allowed cipher suites. + + Configures which cipher suites may be used for connections. + The format is backend-specific but typically follows OpenSSL + cipher list syntax. + + @param ciphers The cipher suite specification string. + + @return Success, or an error if the cipher string is invalid. + + @par Example + @code + // TLS 1.2 cipher suites (OpenSSL format) + ctx.set_ciphersuites( "ECDHE+AESGCM:ECDHE+CHACHA20" ).value(); + @endcode + + @note For TLS 1.3, use `set_ciphersuites_tls13()` on backends + that distinguish between TLS 1.2 and 1.3 cipher configuration. + */ + system::result + set_ciphersuites( std::string_view ciphers ); + + /** Set the ALPN protocol list. + + Configures Application-Layer Protocol Negotiation (ALPN) for + the connection. ALPN is used to negotiate which application + protocol to use over the TLS connection (e.g., "h2" for HTTP/2, + "http/1.1" for HTTP/1.1). + + The protocols are tried in preference order (first = highest). + + @param protocols Ordered list of protocol identifiers. + + @return Success, or an error if ALPN configuration fails. + + @par Example + @code + // Prefer HTTP/2, fall back to HTTP/1.1 + ctx.set_alpn( { "h2", "http/1.1" } ).value(); + @endcode + */ + system::result + set_alpn( std::initializer_list protocols ); + + //-------------------------------------------------------------------------- + // + // Certificate Verification + // + //-------------------------------------------------------------------------- + + /** Set the peer certificate verification mode. + + Controls whether and how peer certificates are verified during + the TLS handshake. + + @param mode The verification mode to use. + + @return Success, or an error if the mode could not be set. + + @par Example + @code + // Verify peer certificate (typical for clients) + ctx.set_verify_mode( tls::verify_mode::peer ).value(); + + // Require client certificate (server-side mTLS) + ctx.set_verify_mode( tls::verify_mode::require_peer ).value(); + @endcode + + @see verify_mode + */ + system::result + set_verify_mode( verify_mode mode ); + + /** Set the maximum certificate chain verification depth. + + Limits how many intermediate certificates can appear between + the peer certificate and a trusted root. The default is + typically 100, which is sufficient for most certificate chains. + + @param depth Maximum number of intermediate certificates allowed. + + @return Success, or an error if the depth is invalid. + */ + system::result + set_verify_depth( int depth ); + + /** Set a custom certificate verification callback. + + Installs a callback that is invoked during certificate chain + verification. The callback can perform additional validation + beyond the standard checks and can override verification + results. + + The callback receives the verification result so far and + information about the certificate being verified. Return + `true` to accept the certificate, `false` to reject. + + @tparam Callback A callable with signature + `bool( bool preverified, verify_context& ctx )`. + + @param callback The verification callback. + + @return Success, or an error if the callback could not be set. + + @note The `verify_context` type provides access to the + certificate and chain information. Its exact interface + depends on the TLS backend. + */ + template + system::result + set_verify_callback( Callback callback ); + + /** Set the expected server hostname for verification. + + For client connections, sets the hostname that the server + certificate must match. This enables: + + 1. SNI (Server Name Indication) — tells the server which + certificate to present (for virtual hosting) + 2. Hostname verification — validates the certificate's + Subject Alternative Name or Common Name matches + + @param hostname The expected server hostname. + + @par Example + @code + ctx.set_hostname( "api.example.com" ); + @endcode + + @note This is typically required for HTTPS clients to ensure + they're connecting to the intended server. + */ + void + set_hostname( std::string_view hostname ); + + //-------------------------------------------------------------------------- + // + // Revocation Checking + // + //-------------------------------------------------------------------------- + + /** Add a Certificate Revocation List from memory. + + Adds a CRL to the verification store for checking whether + certificates have been revoked. CRLs are typically fetched + from the URLs in a certificate's CRL Distribution Points + extension. + + @param crl The CRL data in DER or PEM format. + + @return Success, or an error if the CRL could not be parsed. + + @see add_crl_file + @see set_revocation_policy + */ + system::result + add_crl( std::string_view crl ); + + /** Add a Certificate Revocation List from a file. + + Adds a CRL to the verification store for checking whether + certificates have been revoked. + + @param filename Path to a CRL file (DER or PEM format). + + @return Success, or an error if the file could not be read + or the CRL is invalid. + + @par Example + @code + ctx.add_crl_file( "issuer.crl" ).value(); + @endcode + + @see add_crl + @see set_revocation_policy + */ + system::result + add_crl_file( std::string_view filename ); + + /** Set the OCSP staple response for server-side stapling. + + For servers, provides a pre-fetched OCSP response to send + to clients during the handshake. This proves the server's + certificate hasn't been revoked without requiring the client + to contact the OCSP responder. + + The OCSP response must be periodically refreshed (typically + every few hours to days) before it expires. + + @param response The DER-encoded OCSP response. + + @return Success, or an error if the response is invalid. + + @note This is a server-side operation. Clients use + `set_require_ocsp_staple()` to require stapled responses. + */ + system::result + set_ocsp_staple( std::string_view response ); + + /** Require OCSP stapling from the server. + + For clients, requires the server to provide a stapled OCSP + response proving its certificate hasn't been revoked. If + the server doesn't provide a stapled response, the handshake + fails. + + @param require Whether to require OCSP stapling. + + @note Not all servers support OCSP stapling. Enable this only + when connecting to servers known to support it. + */ + void + set_require_ocsp_staple( bool require ); + + /** Set the certificate revocation checking policy. + + Controls how certificate revocation status is checked during + verification. This affects both CRL and OCSP checking. + + @param policy The revocation checking policy. + + @par Example + @code + // Require successful revocation check + ctx.set_revocation_policy( tls::revocation_policy::hard_fail ); + + // Check but allow unknown status + ctx.set_revocation_policy( tls::revocation_policy::soft_fail ); + @endcode + + @see revocation_policy + @see add_crl + */ + void + set_revocation_policy( revocation_policy policy ); + + //-------------------------------------------------------------------------- + // + // Password Handling + // + //-------------------------------------------------------------------------- + + /** Set the password callback for encrypted keys. + + Installs a callback that provides passwords for encrypted + private keys and PKCS#12 files. The callback is invoked when + loading encrypted key material. + + @tparam Callback A callable with signature + `std::string( std::size_t max_length, password_purpose purpose )`. + + @param callback The password callback. It receives the maximum + password length and the purpose (reading or writing), and + returns the password string. + + @par Example + @code + ctx.set_password_callback( + []( std::size_t max_len, tls::password_purpose purpose ) + { + // In practice, prompt user or read from secure storage + return std::string( "my-key-password" ); + }); + + // Now load encrypted key + ctx.use_private_key_file( "encrypted.key", tls::file_format::pem ).value(); + @endcode + + @see password_purpose + */ + template + void + set_password_callback( Callback callback ); +}; + +} // namespace tls +} // namespace corosio +} // namespace boost + +#endif diff --git a/include/boost/corosio/tls/openssl_stream.hpp b/include/boost/corosio/tls/openssl_stream.hpp new file mode 100644 index 0000000..6874b52 --- /dev/null +++ b/include/boost/corosio/tls/openssl_stream.hpp @@ -0,0 +1,78 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef BOOST_COROSIO_TLS_OPENSSL_STREAM_HPP +#define BOOST_COROSIO_TLS_OPENSSL_STREAM_HPP + +#include +#include + +namespace boost { +namespace corosio { + +/** A TLS stream using OpenSSL. + + This class wraps an underlying stream derived from @ref io_stream + and provides TLS encryption using the OpenSSL library. + + Inherits handshake(), shutdown(), read_some(), and write_some() + from @ref tls_stream. + + @par Thread Safety + Distinct objects: Safe.@n + Shared objects: Unsafe. + + @par Example + @code + tls::context ctx; + ctx.set_hostname( "example.com" ); + ctx.set_verify_mode( tls::verify_mode::peer ); + + corosio::socket raw_socket( ioc ); + raw_socket.open(); + co_await raw_socket.connect( endpoint ); + + corosio::openssl_stream secure( raw_socket, ctx ); + co_await secure.handshake( openssl_stream::client ); + // Use secure stream for TLS communication + @endcode +*/ +class BOOST_COROSIO_DECL openssl_stream : public tls_stream +{ +public: + /** Construct an OpenSSL stream with default context. + + The underlying stream must remain valid for the lifetime of + this openssl_stream object. + + @param stream Reference to the underlying stream to wrap. + */ + explicit + openssl_stream( io_stream& stream ); + + /** Construct an OpenSSL stream with TLS context. + + The underlying stream must remain valid for the lifetime of + this openssl_stream object. The context's configuration is + captured; subsequent modifications to the context are not + reflected in this stream. + + @param stream Reference to the underlying stream to wrap. + @param ctx The TLS context containing configuration. + */ + openssl_stream( io_stream& stream, tls::context ctx ); + +private: + void construct( tls::context ctx ); +}; + +} // namespace corosio +} // namespace boost + +#endif diff --git a/include/boost/corosio/tls/tls_stream.hpp b/include/boost/corosio/tls/tls_stream.hpp new file mode 100644 index 0000000..64a7042 --- /dev/null +++ b/include/boost/corosio/tls/tls_stream.hpp @@ -0,0 +1,265 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef BOOST_COROSIO_TLS_TLS_STREAM_HPP +#define BOOST_COROSIO_TLS_TLS_STREAM_HPP + +#include +#include +#include +#include + +#include +#include + +namespace boost { +namespace corosio { + +/** Abstract base class for TLS streams. + + This class provides the common interface for TLS stream implementations. + It derives from @ref io_stream to inherit read and write operations, + and adds the TLS-specific handshake and shutdown operations. + + Concrete implementations (e.g., wolfssl_stream, openssl_stream) derive + from this class and provide backend-specific functionality. + + @par Thread Safety + Distinct objects: Safe.@n + Shared objects: Unsafe. +*/ +class BOOST_COROSIO_DECL tls_stream : public io_stream +{ + struct handshake_awaitable + { + tls_stream& stream_; + int type_; + std::stop_token token_; + mutable system::error_code ec_; + + handshake_awaitable( + tls_stream& stream, + int type) noexcept + : stream_(stream) + , type_(type) + { + } + + bool await_ready() const noexcept + { + return token_.stop_requested(); + } + + io_result<> await_resume() const noexcept + { + if(token_.stop_requested()) + return {make_error_code(system::errc::operation_canceled)}; + return {ec_}; + } + + template + auto await_suspend( + std::coroutine_handle<> h, + Ex const& ex) -> std::coroutine_handle<> + { + stream_.get().handshake(h, ex, type_, token_, &ec_); + return std::noop_coroutine(); + } + + template + auto await_suspend( + std::coroutine_handle<> h, + Ex const& ex, + std::stop_token token) -> std::coroutine_handle<> + { + token_ = std::move(token); + stream_.get().handshake(h, ex, type_, token_, &ec_); + return std::noop_coroutine(); + } + }; + + struct shutdown_awaitable + { + tls_stream& stream_; + std::stop_token token_; + mutable system::error_code ec_; + + explicit + shutdown_awaitable(tls_stream& stream) noexcept + : stream_(stream) + { + } + + bool await_ready() const noexcept + { + return token_.stop_requested(); + } + + io_result<> await_resume() const noexcept + { + if(token_.stop_requested()) + return {make_error_code(system::errc::operation_canceled)}; + return {ec_}; + } + + template + auto await_suspend( + std::coroutine_handle<> h, + Ex const& ex) -> std::coroutine_handle<> + { + stream_.get().shutdown(h, ex, token_, &ec_); + return std::noop_coroutine(); + } + + template + auto await_suspend( + std::coroutine_handle<> h, + Ex const& ex, + std::stop_token token) -> std::coroutine_handle<> + { + token_ = std::move(token); + stream_.get().shutdown(h, ex, token_, &ec_); + return std::noop_coroutine(); + } + }; + +public: + /** Different handshake types. */ + enum handshake_type + { + /** Perform handshaking as a client. */ + client, + + /** Perform handshaking as a server. */ + server + }; + + /** Perform the TLS handshake asynchronously. + + This function initiates the TLS handshake process. For client + connections, this sends the ClientHello and processes the + server's response. For server connections, this waits for the + ClientHello and sends the server's response. + + The operation supports cancellation via `std::stop_token` through + the affine awaitable protocol. If the associated stop token is + triggered, the operation completes immediately with + `errc::operation_canceled`. + + @param type The type of handshaking to perform (client or server). + + @return An awaitable that completes with `io_result<>`. + Returns success on successful handshake, or an error code + on failure including: + - SSL/TLS errors from the underlying library + - operation_canceled: Cancelled via stop_token + + @par Preconditions + The underlying stream must be connected. + + @par Example + @code + // Client handshake with error code + auto [ec] = co_await secure.handshake(tls_stream::client); + if(ec) { ... } + + // Or with exceptions + (co_await secure.handshake(tls_stream::client)).value(); + @endcode + */ + auto handshake(handshake_type type) + { + return handshake_awaitable(*this, type); + } + + /** Perform a graceful TLS shutdown asynchronously. + + This function initiates the TLS shutdown sequence by sending a + close_notify alert and waiting for the peer's close_notify response. + + The operation supports cancellation via `std::stop_token` through + the affine awaitable protocol. If the associated stop token is + triggered, the operation completes immediately with + `errc::operation_canceled`. + + @return An awaitable that completes with `io_result<>`. + Returns success on successful shutdown, or an error code + on failure. + + @par Preconditions + There must be no pending read or write operations on this stream. + The application must ensure all read_some and write_some operations + have completed before calling shutdown. + + @par Example + @code + auto [ec] = co_await secure.shutdown(); + if(ec) { ... } + @endcode + */ + auto shutdown() + { + return shutdown_awaitable(*this); + } + + /** Returns a reference to the underlying stream. + + @return Reference to the wrapped io_stream. + */ + io_stream& next_layer() noexcept + { + return s_; + } + + /** Returns a const reference to the underlying stream. + + @return Const reference to the wrapped io_stream. + */ + io_stream const& next_layer() const noexcept + { + return s_; + } + + struct tls_stream_impl : io_stream_impl + { + virtual void handshake( + std::coroutine_handle<>, + capy::any_executor_ref, + int, + std::stop_token, + system::error_code*) = 0; + + virtual void shutdown( + std::coroutine_handle<>, + capy::any_executor_ref, + std::stop_token, + system::error_code*) = 0; + }; + +protected: + explicit + tls_stream(io_stream& stream) noexcept + : io_stream(stream.context()) + , s_(stream) + { + } + + io_stream& s_; + +private: + tls_stream_impl& get() const noexcept + { + return *static_cast(impl_); + } +}; + +} // namespace corosio +} // namespace boost + +#endif diff --git a/include/boost/corosio/tls/wolfssl_stream.hpp b/include/boost/corosio/tls/wolfssl_stream.hpp new file mode 100644 index 0000000..6a047e8 --- /dev/null +++ b/include/boost/corosio/tls/wolfssl_stream.hpp @@ -0,0 +1,80 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef BOOST_COROSIO_TLS_WOLFSSL_STREAM_HPP +#define BOOST_COROSIO_TLS_WOLFSSL_STREAM_HPP + +#include +#include +#include + +namespace boost { +namespace corosio { + +/** A TLS stream using WolfSSL. + + This class wraps an underlying stream derived from @ref io_stream + and provides TLS encryption using the WolfSSL library. + + Inherits handshake(), shutdown(), read_some(), and write_some() + from @ref tls_stream. + + @par Thread Safety + Distinct objects: Safe.@n + Shared objects: Unsafe. + + @par Example + @code + tls::context ctx; + ctx.set_hostname( "example.com" ); + ctx.set_verify_mode( tls::verify_mode::peer ); + + corosio::socket raw_socket( ioc ); + raw_socket.open(); + co_await raw_socket.connect( endpoint ); + + corosio::wolfssl_stream secure( raw_socket, ctx ); + co_await secure.handshake( wolfssl_stream::client ); + // Use secure stream for TLS communication + @endcode +*/ +class BOOST_COROSIO_DECL + wolfssl_stream : public tls_stream +{ +public: + /** Construct a WolfSSL stream with default context. + + The underlying stream must remain valid for the lifetime of + this wolfssl_stream object. + + @param stream Reference to the underlying stream to wrap. + */ + explicit + wolfssl_stream(io_stream& stream); + + /** Construct a WolfSSL stream with TLS context. + + The underlying stream must remain valid for the lifetime of + this wolfssl_stream object. The context's configuration is + captured; subsequent modifications to the context are not + reflected in this stream. + + @param stream Reference to the underlying stream to wrap. + @param ctx The TLS context containing configuration. + */ + wolfssl_stream(io_stream& stream, tls::context ctx); + +private: + void construct(tls::context ctx); +}; + +} // namespace corosio +} // namespace boost + +#endif diff --git a/include/boost/corosio/wolfssl_stream.hpp b/include/boost/corosio/wolfssl_stream.hpp deleted file mode 100644 index 140f5c9..0000000 --- a/include/boost/corosio/wolfssl_stream.hpp +++ /dev/null @@ -1,176 +0,0 @@ -// -// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// -// Official repository: https://github.com/cppalliance/corosio -// - -#ifndef BOOST_COROSIO_WOLFSSL_STREAM_HPP -#define BOOST_COROSIO_WOLFSSL_STREAM_HPP - -#include -#include -#include -#include - -#include -#include - -namespace boost { -namespace corosio { - -/** A TLS stream using WolfSSL. - - This class wraps an underlying stream derived from @ref io_stream - and provides TLS encryption using the WolfSSL library. - - @par Thread Safety - Distinct objects: Safe.@n - Shared objects: Unsafe. - - @par Example - @code - corosio::socket raw_socket(ioc); - raw_socket.open(); - co_await raw_socket.connect(endpoint); - - corosio::wolfssl_stream secure(raw_socket); - co_await secure.handshake(wolfssl_stream::client); - // Use secure stream for TLS communication - @endcode -*/ -class BOOST_COROSIO_DECL wolfssl_stream : public io_stream -{ - struct handshake_awaitable - { - wolfssl_stream& stream_; - int type_; - std::stop_token token_; - mutable system::error_code ec_; - - handshake_awaitable( - wolfssl_stream& stream, - int type) noexcept - : stream_(stream) - , type_(type) - { - } - - bool await_ready() const noexcept - { - return token_.stop_requested(); - } - - io_result<> await_resume() const noexcept - { - if(token_.stop_requested()) - return {make_error_code(system::errc::operation_canceled)}; - return {ec_}; - } - - template - auto await_suspend( - std::coroutine_handle<> h, - Ex const& ex) -> std::coroutine_handle<> - { - stream_.get().handshake(h, ex, type_, token_, &ec_); - return std::noop_coroutine(); - } - - template - auto await_suspend( - std::coroutine_handle<> h, - Ex const& ex, - std::stop_token token) -> std::coroutine_handle<> - { - token_ = std::move(token); - stream_.get().handshake(h, ex, type_, token_, &ec_); - return std::noop_coroutine(); - } - }; - -public: - /** Different handshake types. */ - enum handshake_type - { - /** Perform handshaking as a client. */ - client, - - /** Perform handshaking as a server. */ - server - }; - - /** Construct a WolfSSL stream wrapping an existing stream. - - The underlying stream must remain valid for the lifetime of - this wolfssl_stream object. - - @param stream Reference to the underlying stream to wrap. - */ - explicit - wolfssl_stream(io_stream& stream); - - /** Perform the TLS handshake asynchronously. - - This function initiates the TLS handshake process. For client - connections, this calls `wolfSSL_connect()`. For server - connections, this calls `wolfSSL_accept()`. - - The operation supports cancellation via `std::stop_token` through - the affine awaitable protocol. If the associated stop token is - triggered, the operation completes immediately with - `errc::operation_canceled`. - - @param type The type of handshaking to perform (client or server). - - @return An awaitable that completes with `io_result<>`. - Returns success on successful handshake, or an error code - on failure including: - - SSL/TLS errors from WolfSSL - - operation_canceled: Cancelled via stop_token - - @par Preconditions - The underlying stream must be connected. - - @par Example - @code - // Client handshake with error code - auto [ec] = co_await secure.handshake(wolfssl_stream::client); - if(ec) { ... } - - // Or with exceptions - (co_await secure.handshake(wolfssl_stream::client)).value(); - @endcode - */ - auto handshake(handshake_type type) - { - return handshake_awaitable(*this, type); - } - - struct wolfssl_stream_impl : io_stream_impl - { - virtual void handshake( - std::coroutine_handle<>, - capy::any_executor_ref, - int, - std::stop_token, - system::error_code*) = 0; - }; - -private: - void construct(); - - wolfssl_stream_impl& get() const noexcept - { - return *static_cast(impl_); - } - - io_stream& s_; -}; - -} // namespace corosio -} // namespace boost - -#endif diff --git a/include/boost/corosio/write.hpp b/include/boost/corosio/write.hpp index 39b20ba..f6e4370 100644 --- a/include/boost/corosio/write.hpp +++ b/include/boost/corosio/write.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/corosio/src/detail/epoll/sockets.hpp b/src/corosio/src/detail/epoll/sockets.hpp index 14384ef..f58e69b 100644 --- a/src/corosio/src/detail/epoll/sockets.hpp +++ b/src/corosio/src/detail/epoll/sockets.hpp @@ -72,7 +72,7 @@ class epoll_socket_impl void read_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) override; @@ -80,7 +80,7 @@ class epoll_socket_impl void write_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) override; @@ -276,7 +276,7 @@ epoll_socket_impl:: read_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes_out) @@ -330,7 +330,7 @@ epoll_socket_impl:: write_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes_out) diff --git a/src/corosio/src/detail/iocp/sockets.cpp b/src/corosio/src/detail/iocp/sockets.cpp index 24b6dbf..00ebf2e 100644 --- a/src/corosio/src/detail/iocp/sockets.cpp +++ b/src/corosio/src/detail/iocp/sockets.cpp @@ -249,7 +249,7 @@ win_socket_impl:: read_some( capy::any_coro h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes_out) @@ -310,7 +310,7 @@ win_socket_impl:: write_some( capy::any_coro h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes_out) diff --git a/src/corosio/src/detail/iocp/sockets.hpp b/src/corosio/src/detail/iocp/sockets.hpp index 223211d..4d0e61e 100644 --- a/src/corosio/src/detail/iocp/sockets.hpp +++ b/src/corosio/src/detail/iocp/sockets.hpp @@ -134,7 +134,7 @@ class win_socket_impl void read_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) override; @@ -142,7 +142,7 @@ class win_socket_impl void write_some( std::coroutine_handle<>, capy::any_executor_ref, - any_bufref&, + capy::any_bufref&, std::stop_token, system::error_code*, std::size_t*) override; diff --git a/src/corosio/src/tcp_server.cpp b/src/corosio/src/tcp_server.cpp index 9ef03f0..8755bd3 100644 --- a/src/corosio/src/tcp_server.cpp +++ b/src/corosio/src/tcp_server.cpp @@ -12,7 +12,9 @@ namespace boost { namespace corosio { -tcp_server::push_aw::push_aw( +tcp_server:: +push_aw:: +push_aw( tcp_server& self, worker_base& w) noexcept : self_(self) @@ -21,20 +23,24 @@ tcp_server::push_aw::push_aw( } bool -tcp_server::push_aw::await_ready() const noexcept +tcp_server:: +push_aw:: +await_ready() const noexcept { return false; } void -tcp_server::push_aw::await_resume() noexcept +tcp_server:: +push_aw:: +await_resume() noexcept { if(self_.waiters_) { auto* wait = self_.waiters_; self_.waiters_ = wait->next; wait->w = &w_; - self_.post_.post(wait->h); + self_.ex_.post(wait->h); } else { @@ -42,14 +48,18 @@ tcp_server::push_aw::await_resume() noexcept } } -tcp_server::pop_aw::pop_aw(tcp_server& self) noexcept +tcp_server:: +pop_aw:: +pop_aw(tcp_server& self) noexcept : self_(self) , wait_{} { } bool -tcp_server::pop_aw::await_ready() const noexcept +tcp_server:: +pop_aw:: +await_ready() const noexcept { return self_.wv_.idle_ != nullptr; } @@ -62,21 +72,23 @@ tcp_server::pop_aw::await_resume() noexcept return *self_.wv_.try_pop(); } -tcp_server::push_aw -tcp_server::push(worker_base& w) +auto +tcp_server:: +push(worker_base& w) -> push_aw { return push_aw{*this, w}; } void -tcp_server::push_sync(worker_base& w) noexcept +tcp_server:: +push_sync(worker_base& w) noexcept { if(waiters_) { auto* wait = waiters_; waiters_ = wait->next; wait->w = &w; - post_.post(wait->h); + ex_.post(wait->h); } else { @@ -100,7 +112,7 @@ tcp_server::do_accept(acceptor& acc) if(rv.has_error()) continue; auto& w = rv.value(); - auto ec = co_await acc.accept(w.sock); + auto [ec] = co_await acc.accept(w.socket()); if(ec) { co_await push(w); @@ -123,7 +135,7 @@ void tcp_server::start() { for(auto& t : ports_) - capy::run_async(post_)(do_accept(t)); + capy::run_async(ex_)(do_accept(t)); } } // namespace corosio diff --git a/src/corosio/src/test/mocket.cpp b/src/corosio/src/test/mocket.cpp index 9e38358..e2a26be 100644 --- a/src/corosio/src/test/mocket.cpp +++ b/src/corosio/src/test/mocket.cpp @@ -90,7 +90,7 @@ class mocket_impl void read_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& buffers, + capy::any_bufref& buffers, std::stop_token token, system::error_code* ec, std::size_t* bytes_transferred) override; @@ -98,7 +98,7 @@ class mocket_impl void write_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& buffers, + capy::any_bufref& buffers, std::stop_token token, system::error_code* ec, std::size_t* bytes_transferred) override; @@ -257,7 +257,7 @@ mocket_impl:: read_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& buffers, + capy::any_bufref& buffers, std::stop_token token, system::error_code* ec, std::size_t* bytes_transferred) @@ -310,7 +310,7 @@ mocket_impl:: write_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& buffers, + capy::any_bufref& buffers, std::stop_token token, system::error_code* ec, std::size_t* bytes_transferred) @@ -454,7 +454,7 @@ get_test_port() noexcept { auto port = test_port_base + (next_test_port % test_port_range); ++next_test_port; - return port; + return static_cast(port); } } // namespace diff --git a/src/corosio/src/tls/detail/context_impl.hpp b/src/corosio/src/tls/detail/context_impl.hpp new file mode 100644 index 0000000..cdb0c3d --- /dev/null +++ b/src/corosio/src/tls/detail/context_impl.hpp @@ -0,0 +1,168 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef SRC_TLS_DETAIL_CONTEXT_IMPL_HPP +#define SRC_TLS_DETAIL_CONTEXT_IMPL_HPP + +#include + +#include +#include +#include +#include + +namespace boost { +namespace corosio { +namespace tls { +namespace detail { + +/** Abstract base for cached native SSL contexts. + + Stored in context::impl as an intrusive linked list. + Each TLS backend derives from this to cache its native + context handle ( WOLFSSL_CTX*, SSL_CTX*, etc. ). +*/ +class native_context_base +{ +public: + native_context_base* next_ = nullptr; + void const* service_ = nullptr; + + virtual ~native_context_base() = default; +}; + +struct context_data +{ + //-------------------------------------------- + // Credentials + + std::string entity_certificate; + file_format entity_cert_format = file_format::pem; + std::string certificate_chain; + std::string private_key; + file_format private_key_format = file_format::pem; + + //-------------------------------------------- + // Trust anchors + + std::vector ca_certificates; + std::vector verify_paths; + bool use_default_verify_paths = false; + + //-------------------------------------------- + // Protocol settings + + version min_version = version::tls_1_2; + version max_version = version::tls_1_3; + std::string ciphersuites; + std::vector alpn_protocols; + + //-------------------------------------------- + // Verification + + verify_mode verification_mode = verify_mode::none; + int verify_depth = 100; + std::string hostname; + std::function verify_callback; + + //-------------------------------------------- + // Revocation + + std::vector crls; + std::string ocsp_staple; + bool require_ocsp_staple = false; + revocation_policy revocation = revocation_policy::disabled; + + //-------------------------------------------- + // Password + + std::function password_callback; + + //-------------------------------------------- + // Cached native contexts (intrusive list) + + mutable std::mutex native_contexts_mutex_; + mutable detail::native_context_base* native_contexts_ = nullptr; + + /** Find or insert a cached native context. + + @param service The unique key for the backend. + @param create Factory function called if not found. + + @return Pointer to the cached native context. + */ + template + detail::native_context_base* + find( void const* service, Factory&& create ) const + { + std::lock_guard lock( native_contexts_mutex_ ); + + for( auto* p = native_contexts_; p; p = p->next_ ) + if( p->service_ == service ) + return p; + + // Not found - create and prepend + auto* ctx = create(); + ctx->service_ = service; + ctx->next_ = native_contexts_; + native_contexts_ = ctx; + return ctx; + } + + ~context_data() + { + // Clean up cached native contexts (no lock needed - destructor) + while( native_contexts_ ) + { + auto* next = native_contexts_->next_; + delete native_contexts_; + native_contexts_ = next; + } + } +}; + + +} // namespace detail + +//------------------------------------------------------------------------------ + +/** Implementation of tls::context. + + Contains all portable TLS configuration data plus + cached native SSL contexts as an intrusive list. +*/ +struct context::impl : detail::context_data +{ +}; + +//------------------------------------------------------------------------------ + +namespace detail { + +/** Return the TLS context data. + + Provides read-only access to the portable configuration + stored in the context. + + @param ctx The TLS context. + + @return Reference to the context implementation. +*/ +inline context_data const& +get_context_data( context const& ctx ) noexcept +{ + return *ctx.impl_; +} + +} // namespace detail +} // namespace tls +} // namespace corosio +} // namespace boost + +#endif diff --git a/src/openssl/src/openssl.cpp b/src/openssl/src/openssl.cpp new file mode 100644 index 0000000..d64580c --- /dev/null +++ b/src/openssl/src/openssl.cpp @@ -0,0 +1,12 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#include + +// OpenSSL integration sources for boost::corosio diff --git a/src/openssl/src/openssl_stream.cpp b/src/openssl/src/openssl_stream.cpp new file mode 100644 index 0000000..8770932 --- /dev/null +++ b/src/openssl/src/openssl_stream.cpp @@ -0,0 +1,774 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#include +#include +#include +#include +#include + +// Internal context implementation +#include "src/tls/detail/context_impl.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + openssl_stream Architecture + =========================== + + TLS layer wrapping an underlying io_stream. Supports one concurrent + read_some and one concurrent write_some (like Asio's ssl::stream). + + Data Flow (using BIO pairs) + --------------------------- + App -> SSL_write -> int_bio_ -> BIO_read(ext_bio_) -> out_buf_ -> s_.write_some -> Network + App <- SSL_read <- int_bio_ <- BIO_write(ext_bio_) <- in_buf_ <- s_.read_some <- Network + + WANT_READ / WANT_WRITE Pattern + ------------------------------ + OpenSSL's SSL_read/SSL_write return SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE + when they need I/O. Our coroutine handles this by: + + 1. Call SSL_read or SSL_write + 2. Check for pending output in ext_bio_ via BIO_ctrl_pending + 3. If output pending: write to network via s_.write_some + 4. If SSL_ERROR_WANT_READ: read from network into ext_bio_ via s_.read_some + BIO_write + 5. Loop back to step 1 + + Renegotiation causes cross-direction I/O: SSL_read may need to write + handshake data, SSL_write may need to read. Each operation handles + whatever I/O direction OpenSSL requests. + + Key Types + --------- + - openssl_stream_impl_ : tls_stream_impl -- the impl stored in io_object::impl_ + - do_read_some, do_write_some -- inner coroutines with WANT_* loops +*/ + +namespace boost { +namespace corosio { + +namespace { + +// Default buffer size for TLS I/O +constexpr std::size_t default_buffer_size = 16384; + +// Maximum number of buffers to handle in a single operation +constexpr std::size_t max_buffers = 8; + +// Buffer array type for coroutine parameters (copied into frame) +using buffer_array = std::array; + +} // namespace + +//------------------------------------------------------------------------------ +// +// Native context caching +// +//------------------------------------------------------------------------------ + +namespace tls { +namespace detail { + +/** Cached OpenSSL context owning SSL_CTX. + + Created on first stream construction for a given tls::context, + then reused for subsequent streams sharing that context. +*/ +class openssl_native_context + : public native_context_base +{ +public: + SSL_CTX* ctx_; + + explicit + openssl_native_context( context_data const& cd ) + : ctx_( nullptr ) + { + // Create SSL_CTX for TLS client (auto-negotiate best version) + ctx_ = SSL_CTX_new( TLS_client_method() ); + if( !ctx_ ) + return; + + // Set modes for partial writes and moving buffers + SSL_CTX_set_mode( ctx_, SSL_MODE_ENABLE_PARTIAL_WRITE ); + SSL_CTX_set_mode( ctx_, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER ); +#if defined( SSL_MODE_RELEASE_BUFFERS ) + SSL_CTX_set_mode( ctx_, SSL_MODE_RELEASE_BUFFERS ); +#endif + + // Apply verify mode from config + int verify_mode_flag = SSL_VERIFY_NONE; + if( cd.verification_mode == verify_mode::peer ) + verify_mode_flag = SSL_VERIFY_PEER; + else if( cd.verification_mode == verify_mode::require_peer ) + verify_mode_flag = SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT; + SSL_CTX_set_verify( ctx_, verify_mode_flag, nullptr ); + + // Apply certificates if provided + if( !cd.entity_certificate.empty() ) + { + BIO* bio = BIO_new_mem_buf( + cd.entity_certificate.data(), + static_cast( cd.entity_certificate.size() ) ); + if( bio ) + { + X509* cert = nullptr; + if( cd.entity_cert_format == file_format::pem ) + cert = PEM_read_bio_X509( bio, nullptr, nullptr, nullptr ); + else + cert = d2i_X509_bio( bio, nullptr ); + if( cert ) + { + SSL_CTX_use_certificate( ctx_, cert ); + X509_free( cert ); + } + BIO_free( bio ); + } + } + + // Apply private key if provided + if( !cd.private_key.empty() ) + { + BIO* bio = BIO_new_mem_buf( + cd.private_key.data(), + static_cast( cd.private_key.size() ) ); + if( bio ) + { + EVP_PKEY* pkey = nullptr; + if( cd.private_key_format == file_format::pem ) + pkey = PEM_read_bio_PrivateKey( bio, nullptr, nullptr, nullptr ); + else + pkey = d2i_PrivateKey_bio( bio, nullptr ); + if( pkey ) + { + SSL_CTX_use_PrivateKey( ctx_, pkey ); + EVP_PKEY_free( pkey ); + } + BIO_free( bio ); + } + } + + // Apply CA certificates for verification + X509_STORE* store = SSL_CTX_get_cert_store( ctx_ ); + for( auto const& ca : cd.ca_certificates ) + { + BIO* bio = BIO_new_mem_buf( ca.data(), static_cast( ca.size() ) ); + if( bio ) + { + X509* cert = PEM_read_bio_X509( bio, nullptr, nullptr, nullptr ); + if( cert ) + { + X509_STORE_add_cert( store, cert ); + X509_free( cert ); + } + BIO_free( bio ); + } + } + + // Apply verify depth + SSL_CTX_set_verify_depth( ctx_, cd.verify_depth ); + } + + ~openssl_native_context() override + { + if( ctx_ ) + SSL_CTX_free( ctx_ ); + } +}; + +/** Get or create cached SSL_CTX for this context. + + @param cd The context implementation. + + @return Pointer to the cached SSL_CTX. +*/ +inline SSL_CTX* +get_openssl_context( context_data const& cd ) +{ + static char key; + auto* p = cd.find( &key, [&] + { + return new openssl_native_context( cd ); + }); + return static_cast( p )->ctx_; +} + +} // namespace detail +} // namespace tls + +//------------------------------------------------------------------------------ + +struct openssl_stream_impl_ + : tls_stream::tls_stream_impl +{ + io_stream& s_; + tls::context ctx_; // holds ref to cached native context + SSL* ssl_ = nullptr; + BIO* ext_bio_ = nullptr; + + // Buffers for network I/O + std::vector in_buf_; + std::vector out_buf_; + + // Renegotiation can cause both TLS read/write to access the socket + capy::async_mutex io_mutex_; + + //-------------------------------------------------------------------------- + + openssl_stream_impl_( io_stream& s, tls::context ctx ) + : s_( s ) + , ctx_( std::move( ctx ) ) + { + in_buf_.resize( default_buffer_size ); + out_buf_.resize( default_buffer_size ); + } + + ~openssl_stream_impl_() + { + if( ext_bio_ ) + BIO_free( ext_bio_ ); + if( ssl_ ) + SSL_free( ssl_ ); + // SSL_CTX* is owned by cached native context, not freed here + } + + //-------------------------------------------------------------------------- + // Helper to flush pending output from BIO to network + //-------------------------------------------------------------------------- + + capy::task + flush_output() + { + while(BIO_ctrl_pending(ext_bio_) > 0) + { + int pending = static_cast(BIO_ctrl_pending(ext_bio_)); + int to_read = (std::min)(pending, static_cast(out_buf_.size())); + int n = BIO_read(ext_bio_, out_buf_.data(), to_read); + if(n <= 0) + break; + + // Write to underlying stream + auto guard = co_await io_mutex_.scoped_lock(); + auto [ec, written] = co_await s_.write_some( + capy::mutable_buffer(out_buf_.data(), static_cast(n))); + if(ec) + co_return ec; + } + co_return system::error_code{}; + } + + capy::task + read_input() + { + auto guard = co_await io_mutex_.scoped_lock(); + auto [ec, n] = co_await s_.read_some( + capy::mutable_buffer(in_buf_.data(), in_buf_.size())); + if(ec) + co_return ec; + + // Feed data into OpenSSL + int written = BIO_write(ext_bio_, in_buf_.data(), static_cast(n)); + (void)written; + + co_return system::error_code{}; + } + + //-------------------------------------------------------------------------- + // Inner coroutines for TLS read/write operations + //-------------------------------------------------------------------------- + + capy::task<> + do_read_some( + buffer_array dest_bufs, + std::size_t buf_count, + std::stop_token token, + system::error_code* ec_out, + std::size_t* bytes_out, + std::coroutine_handle<> continuation, + capy::any_executor_ref d) + { + system::error_code ec; + std::size_t total_read = 0; + + // Process each destination buffer + for(std::size_t i = 0; i < buf_count && !token.stop_requested(); ++i) + { + char* dest = static_cast(dest_bufs[i].data()); + int remaining = static_cast(dest_bufs[i].size()); + + while(remaining > 0 && !token.stop_requested()) + { + ERR_clear_error(); + int ret = SSL_read(ssl_, dest, remaining); + + if(ret > 0) + { + dest += ret; + remaining -= ret; + total_read += static_cast(ret); + + // For read_some semantics, return after first successful read + if(total_read > 0) + goto done; + } + else + { + int err = SSL_get_error(ssl_, ret); + + if(err == SSL_ERROR_WANT_WRITE) + { + // Flush pending output (renegotiation) + ec = co_await flush_output(); + if(ec) + goto done; + } + else if(err == SSL_ERROR_WANT_READ) + { + // First flush any pending output + ec = co_await flush_output(); + if(ec) + goto done; + + // Then read from network + ec = co_await read_input(); + if(ec) + { + if(ec == make_error_code(capy::error::eof)) + { + // Check if we got a proper shutdown + if(SSL_get_shutdown(ssl_) & SSL_RECEIVED_SHUTDOWN) + ec = make_error_code(capy::error::eof); + } + goto done; + } + } + else if(err == SSL_ERROR_ZERO_RETURN) + { + ec = make_error_code(capy::error::eof); + goto done; + } + else if(err == SSL_ERROR_SYSCALL) + { + unsigned long ssl_err = ERR_get_error(); + if(ssl_err == 0) + ec = make_error_code(capy::error::eof); + else + ec = system::error_code( + static_cast(ssl_err), system::system_category()); + goto done; + } + else + { + unsigned long ssl_err = ERR_get_error(); + ec = system::error_code( + static_cast(ssl_err), system::system_category()); + goto done; + } + } + } + } + + done: + if(token.stop_requested()) + ec = make_error_code(system::errc::operation_canceled); + + *ec_out = ec; + *bytes_out = total_read; + + d.dispatch(capy::any_coro{continuation}).resume(); + co_return; + } + + capy::task<> + do_write_some( + buffer_array src_bufs, + std::size_t buf_count, + std::stop_token token, + system::error_code* ec_out, + std::size_t* bytes_out, + std::coroutine_handle<> continuation, + capy::any_executor_ref d) + { + system::error_code ec; + std::size_t total_written = 0; + + // Process each source buffer + for(std::size_t i = 0; i < buf_count && !token.stop_requested(); ++i) + { + char const* src = static_cast(src_bufs[i].data()); + int remaining = static_cast(src_bufs[i].size()); + + while(remaining > 0 && !token.stop_requested()) + { + ERR_clear_error(); + int ret = SSL_write(ssl_, src, remaining); + + if(ret > 0) + { + src += ret; + remaining -= ret; + total_written += static_cast(ret); + + // For write_some semantics, flush and return after first successful write + if(total_written > 0) + { + ec = co_await flush_output(); + goto done; + } + } + else + { + int err = SSL_get_error(ssl_, ret); + + if(err == SSL_ERROR_WANT_WRITE) + { + ec = co_await flush_output(); + if(ec) + goto done; + } + else if(err == SSL_ERROR_WANT_READ) + { + // Renegotiation - flush then read + ec = co_await flush_output(); + if(ec) + goto done; + + ec = co_await read_input(); + if(ec) + goto done; + } + else + { + unsigned long ssl_err = ERR_get_error(); + ec = system::error_code( + static_cast(ssl_err), system::system_category()); + goto done; + } + } + } + } + + done: + if(token.stop_requested()) + ec = make_error_code(system::errc::operation_canceled); + + *ec_out = ec; + *bytes_out = total_written; + + d.dispatch(capy::any_coro{continuation}).resume(); + co_return; + } + + capy::task<> + do_handshake( + int type, + std::stop_token token, + system::error_code* ec_out, + std::coroutine_handle<> continuation, + capy::any_executor_ref d) + { + system::error_code ec; + + while(!token.stop_requested()) + { + ERR_clear_error(); + int ret; + if(type == openssl_stream::client) + ret = SSL_connect(ssl_); + else + ret = SSL_accept(ssl_); + + if(ret == 1) + { + // Handshake completed - flush any remaining output + ec = co_await flush_output(); + break; + } + else + { + int err = SSL_get_error(ssl_, ret); + + if(err == SSL_ERROR_WANT_WRITE) + { + ec = co_await flush_output(); + if(ec) + break; + } + else if(err == SSL_ERROR_WANT_READ) + { + // Flush output first (e.g., ClientHello) + ec = co_await flush_output(); + if(ec) + break; + + // Then read response + ec = co_await read_input(); + if(ec) + break; + } + else + { + unsigned long ssl_err = ERR_get_error(); + ec = system::error_code( + static_cast(ssl_err), system::system_category()); + break; + } + } + } + + if(token.stop_requested()) + ec = make_error_code(system::errc::operation_canceled); + + *ec_out = ec; + + d.dispatch(capy::any_coro{continuation}).resume(); + co_return; + } + + capy::task<> + do_shutdown( + std::stop_token token, + system::error_code* ec_out, + std::coroutine_handle<> continuation, + capy::any_executor_ref d) + { + system::error_code ec; + + while(!token.stop_requested()) + { + ERR_clear_error(); + int ret = SSL_shutdown(ssl_); + + if(ret == 1) + { + // Bidirectional shutdown complete + ec = co_await flush_output(); + break; + } + else if(ret == 0) + { + // Sent close_notify, need to receive peer's + ec = co_await flush_output(); + if(ec) + break; + + // Continue to receive peer's close_notify + ec = co_await read_input(); + if(ec) + { + // EOF is expected during shutdown + if(ec == make_error_code(capy::error::eof)) + ec = {}; + break; + } + } + else + { + int err = SSL_get_error(ssl_, ret); + + if(err == SSL_ERROR_WANT_WRITE) + { + ec = co_await flush_output(); + if(ec) + break; + } + else if(err == SSL_ERROR_WANT_READ) + { + ec = co_await flush_output(); + if(ec) + break; + + ec = co_await read_input(); + if(ec) + { + if(ec == make_error_code(capy::error::eof)) + ec = {}; + break; + } + } + else + { + unsigned long ssl_err = ERR_get_error(); + if(ssl_err == 0 && err == SSL_ERROR_SYSCALL) + { + // Connection closed without close_notify - acceptable + ec = {}; + } + else + { + ec = system::error_code( + static_cast(ssl_err), system::system_category()); + } + break; + } + } + } + + if(token.stop_requested()) + ec = make_error_code(system::errc::operation_canceled); + + *ec_out = ec; + + d.dispatch(capy::any_coro{continuation}).resume(); + co_return; + } + + //-------------------------------------------------------------------------- + // io_stream_impl interface + //-------------------------------------------------------------------------- + + void release() override + { + delete this; + } + + void read_some( + std::coroutine_handle<> h, + capy::any_executor_ref d, + capy::any_bufref& param, + std::stop_token token, + system::error_code* ec, + std::size_t* bytes) override + { + buffer_array bufs{}; + std::size_t count = param.copy_to(bufs.data(), max_buffers); + + capy::run_async(d)( + do_read_some(bufs, count, token, ec, bytes, h, d)); + } + + void write_some( + std::coroutine_handle<> h, + capy::any_executor_ref d, + capy::any_bufref& param, + std::stop_token token, + system::error_code* ec, + std::size_t* bytes) override + { + buffer_array bufs{}; + std::size_t count = param.copy_to(bufs.data(), max_buffers); + + capy::run_async(d)( + do_write_some(bufs, count, token, ec, bytes, h, d)); + } + + void handshake( + std::coroutine_handle<> h, + capy::any_executor_ref d, + int type, + std::stop_token token, + system::error_code* ec) override + { + capy::run_async(d)( + do_handshake(type, token, ec, h, d)); + } + + void shutdown( + std::coroutine_handle<> h, + capy::any_executor_ref d, + std::stop_token token, + system::error_code* ec) override + { + capy::run_async(d)( + do_shutdown(token, ec, h, d)); + } + + //-------------------------------------------------------------------------- + // Initialization + //-------------------------------------------------------------------------- + + system::error_code + init_ssl() + { + // Get cached SSL_CTX from tls::context + auto& impl = tls::detail::get_context_data( ctx_ ); + SSL_CTX* native_ctx = tls::detail::get_openssl_context( impl ); + if( !native_ctx ) + { + unsigned long err = ERR_get_error(); + return system::error_code( + static_cast( err ), system::system_category() ); + } + + // Create SSL session from cached context + ssl_ = SSL_new( native_ctx ); + if( !ssl_ ) + { + unsigned long err = ERR_get_error(); + return system::error_code( + static_cast( err ), system::system_category() ); + } + + // Create BIO pair for I/O + BIO* int_bio = nullptr; + if( !BIO_new_bio_pair( &int_bio, 0, &ext_bio_, 0 ) ) + { + unsigned long err = ERR_get_error(); + SSL_free( ssl_ ); + ssl_ = nullptr; + return system::error_code( + static_cast( err ), system::system_category() ); + } + + // Attach internal BIO to SSL (SSL takes ownership) + SSL_set_bio( ssl_, int_bio, int_bio ); + + // Apply per-session config (SNI) from context + if( !impl.hostname.empty() ) + { + SSL_set_tlsext_host_name( ssl_, impl.hostname.c_str() ); + } + + return {}; + } +}; + +//------------------------------------------------------------------------------ + +openssl_stream:: +openssl_stream( io_stream& stream ) + : tls_stream( stream ) +{ + construct( tls::context() ); +} + +openssl_stream:: +openssl_stream( io_stream& stream, tls::context ctx ) + : tls_stream( stream ) +{ + construct( std::move( ctx ) ); +} + +void +openssl_stream:: +construct( tls::context ctx ) +{ + auto* impl = new openssl_stream_impl_( s_, std::move( ctx ) ); + + auto ec = impl->init_ssl(); + if( ec ) + { + delete impl; + return; + } + + impl_ = impl; +} + +} // namespace corosio +} // namespace boost diff --git a/src/wolfssl/src/wolfssl_stream.cpp b/src/wolfssl/src/wolfssl_stream.cpp index 2aaae4d..2e07dc2 100644 --- a/src/wolfssl/src/wolfssl_stream.cpp +++ b/src/wolfssl/src/wolfssl_stream.cpp @@ -7,12 +7,15 @@ // Official repository: https://github.com/cppalliance/corosio // -#include +#include #include #include #include #include +// Internal context implementation +#include "src/tls/detail/context_impl.hpp" + // Include WolfSSL options first to get proper feature detection #include #include @@ -53,9 +56,9 @@ Key Types --------- - - wolfssl_stream_impl : io_stream_impl -- the impl stored in io_object::impl_ - - recv_callback, send_callback -- WolfSSL I/O hooks (static) - - do_read_some, do_write_some -- inner coroutines with WANT_* loops + - wolfssl_stream_impl_ : tls_stream_impl -- the impl stored in io_object::impl_ + - recv_callback, send_callback -- WolfSSL I/O hooks (static) + - do_read_some, do_write_some -- inner coroutines with WANT_* loops */ namespace boost { @@ -74,13 +77,112 @@ using buffer_array = std::array; } // namespace +//------------------------------------------------------------------------------ +// +// Native context caching +// +//------------------------------------------------------------------------------ + +namespace tls { +namespace detail { + +/** Cached WolfSSL context owning WOLFSSL_CTX. + + Created on first stream construction for a given tls::context, + then reused for subsequent streams sharing that context. +*/ +class wolfssl_native_context + : public native_context_base +{ +public: + WOLFSSL_CTX* ctx_; + + explicit + wolfssl_native_context( context_data const& cd ) + : ctx_( nullptr ) + { + // Create WOLFSSL_CTX for TLS client (auto-negotiate best version) + ctx_ = wolfSSL_CTX_new( wolfTLS_client_method() ); + if( !ctx_ ) + return; + + // Apply verify mode from config + int verify_mode_flag = WOLFSSL_VERIFY_NONE; + if( cd.verification_mode == verify_mode::peer ) + verify_mode_flag = WOLFSSL_VERIFY_PEER; + else if( cd.verification_mode == verify_mode::require_peer ) + verify_mode_flag = WOLFSSL_VERIFY_PEER | WOLFSSL_VERIFY_FAIL_IF_NO_PEER_CERT; + wolfSSL_CTX_set_verify( ctx_, verify_mode_flag, nullptr ); + + // Apply certificates if provided + if( !cd.entity_certificate.empty() ) + { + int format = ( cd.entity_cert_format == file_format::pem ) + ? WOLFSSL_FILETYPE_PEM : WOLFSSL_FILETYPE_ASN1; + wolfSSL_CTX_use_certificate_buffer( ctx_, + reinterpret_cast( cd.entity_certificate.data() ), + static_cast( cd.entity_certificate.size() ), + format ); + } + + // Apply private key if provided + if( !cd.private_key.empty() ) + { + int format = ( cd.private_key_format == file_format::pem ) + ? WOLFSSL_FILETYPE_PEM : WOLFSSL_FILETYPE_ASN1; + wolfSSL_CTX_use_PrivateKey_buffer( ctx_, + reinterpret_cast( cd.private_key.data() ), + static_cast( cd.private_key.size() ), + format ); + } + + // Apply CA certificates for verification + for( auto const& ca : cd.ca_certificates ) + { + wolfSSL_CTX_load_verify_buffer( ctx_, + reinterpret_cast( ca.data() ), + static_cast( ca.size() ), + WOLFSSL_FILETYPE_PEM ); + } + + // Apply verify depth + wolfSSL_CTX_set_verify_depth( ctx_, cd.verify_depth ); + } + + ~wolfssl_native_context() override + { + if( ctx_ ) + wolfSSL_CTX_free( ctx_ ); + } +}; + +/** Get or create cached WOLFSSL_CTX for this context. + + @param cd The context implementation. + + @return Pointer to the cached WOLFSSL_CTX. +*/ +inline WOLFSSL_CTX* +get_wolfssl_context( context_data const& cd ) +{ + static char key; + auto* p = cd.find( &key, [&] + { + return new wolfssl_native_context( cd ); + }); + return static_cast( p )->ctx_; +} + +} // namespace detail +} // namespace tls + //------------------------------------------------------------------------------ struct wolfssl_stream_impl_ - : wolfssl_stream::wolfssl_stream_impl + : tls_stream::tls_stream_impl { io_stream& s_; - WOLFSSL_CTX* ctx_ = nullptr; + tls::context ctx_; // holds ref to cached native context WOLFSSL* ssl_ = nullptr; // Buffers for read operations (used by do_read_some) @@ -116,22 +218,21 @@ struct wolfssl_stream_impl_ //-------------------------------------------------------------------------- - explicit - wolfssl_stream_impl_(io_stream& s) - : s_(s) + wolfssl_stream_impl_( io_stream& s, tls::context ctx ) + : s_( s ) + , ctx_( std::move( ctx ) ) { - read_in_buf_.resize(default_buffer_size); - read_out_buf_.resize(default_buffer_size); - write_in_buf_.resize(default_buffer_size); - write_out_buf_.resize(default_buffer_size); + read_in_buf_.resize( default_buffer_size ); + read_out_buf_.resize( default_buffer_size ); + write_in_buf_.resize( default_buffer_size ); + write_out_buf_.resize( default_buffer_size ); } ~wolfssl_stream_impl_() { - if(ssl_) - wolfSSL_free(ssl_); - if(ctx_) - wolfSSL_CTX_free(ctx_); + if( ssl_ ) + wolfSSL_free( ssl_ ); + // WOLFSSL_CTX* is owned by cached native context, not freed here } //-------------------------------------------------------------------------- @@ -570,6 +671,139 @@ struct wolfssl_stream_impl_ co_return; } + /** Inner coroutine that performs TLS shutdown with WANT_READ/WANT_WRITE loop. + + Calls wolfSSL_shutdown in a loop, performing async I/O on the + underlying stream when needed. + */ + capy::task<> + do_shutdown( + std::stop_token token, + system::error_code* ec_out, + std::coroutine_handle<> continuation, + capy::any_executor_ref d) + { + system::error_code ec; + + // Set up operation buffers for callbacks (use read buffers for shutdown) + op_buffers op{ + &read_in_buf_, &read_in_pos_, &read_in_len_, + &read_out_buf_, &read_out_len_, + false, false + }; + current_op_ = &op; + + while(!token.stop_requested()) + { + op.want_read = false; + op.want_write = false; + + int ret = wolfSSL_shutdown(ssl_); + + if(ret == WOLFSSL_SUCCESS) + { + // Shutdown completed successfully + // Flush any remaining output + while(read_out_len_ > 0) + { + capy::mutable_buffer buf(read_out_buf_.data(), read_out_len_); + auto [wec, wn] = co_await do_underlying_write(buf); + if(wec) + { + ec = wec; + break; + } + if(wn < read_out_len_) + std::memmove(read_out_buf_.data(), read_out_buf_.data() + wn, read_out_len_ - wn); + read_out_len_ -= wn; + } + break; + } + else if(ret == WOLFSSL_SHUTDOWN_NOT_DONE) + { + int err = wolfSSL_get_error(ssl_, ret); + + if(err == WOLFSSL_ERROR_WANT_READ) + { + // Flush any pending output first + while(read_out_len_ > 0) + { + capy::mutable_buffer buf(read_out_buf_.data(), read_out_len_); + auto [wec, wn] = co_await do_underlying_write(buf); + if(wec) + { + ec = wec; + goto exit_shutdown; + } + if(wn < read_out_len_) + std::memmove(read_out_buf_.data(), read_out_buf_.data() + wn, read_out_len_ - wn); + read_out_len_ -= wn; + } + + if(read_in_pos_ == read_in_len_) + { + read_in_pos_ = 0; + read_in_len_ = 0; + } + capy::mutable_buffer buf( + read_in_buf_.data() + read_in_len_, + read_in_buf_.size() - read_in_len_); + auto [rec, rn] = co_await do_underlying_read(buf); + if(rec) + { + // EOF from peer is expected during shutdown + if(rec == make_error_code(capy::error::eof)) + break; + ec = rec; + break; + } + read_in_len_ += rn; + } + else if(err == WOLFSSL_ERROR_WANT_WRITE) + { + while(read_out_len_ > 0) + { + capy::mutable_buffer buf(read_out_buf_.data(), read_out_len_); + auto [wec, wn] = co_await do_underlying_write(buf); + if(wec) + { + ec = wec; + goto exit_shutdown; + } + if(wn < read_out_len_) + std::memmove(read_out_buf_.data(), read_out_buf_.data() + wn, read_out_len_ - wn); + read_out_len_ -= wn; + } + } + else + { + // Other error + ec = system::error_code(err, system::system_category()); + break; + } + } + else + { + // SSL_FATAL_ERROR + int err = wolfSSL_get_error(ssl_, ret); + ec = system::error_code(err, system::system_category()); + break; + } + } + + exit_shutdown: + current_op_ = nullptr; + + if(token.stop_requested()) + ec = make_error_code(system::errc::operation_canceled); + + *ec_out = ec; + + // Resume the original caller via executor + d.dispatch(capy::any_coro{continuation}).resume(); + co_return; + } + //-------------------------------------------------------------------------- // io_stream_impl interface //-------------------------------------------------------------------------- @@ -582,7 +816,7 @@ struct wolfssl_stream_impl_ void read_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes) override @@ -600,7 +834,7 @@ struct wolfssl_stream_impl_ void write_some( std::coroutine_handle<> h, capy::any_executor_ref d, - any_bufref& param, + capy::any_bufref& param, std::stop_token token, system::error_code* ec, std::size_t* bytes) override @@ -627,6 +861,17 @@ struct wolfssl_stream_impl_ do_handshake(type, token, ec, h, d)); } + void shutdown( + std::coroutine_handle<> h, + capy::any_executor_ref d, + std::stop_token token, + system::error_code* ec) override + { + // Launch inner coroutine via run_async + capy::run_async(d)( + do_shutdown(token, ec, h, d)); + } + //-------------------------------------------------------------------------- // Initialization //-------------------------------------------------------------------------- @@ -634,41 +879,39 @@ struct wolfssl_stream_impl_ system::error_code init_ssl() { - // Create WolfSSL context for TLS client (auto-negotiate best version) - ctx_ = wolfSSL_CTX_new(wolfTLS_client_method()); - if(!ctx_) + // Get cached WOLFSSL_CTX from tls::context + auto& impl = tls::detail::get_context_data( ctx_ ); + WOLFSSL_CTX* native_ctx = tls::detail::get_wolfssl_context( impl ); + if( !native_ctx ) { return system::error_code( - wolfSSL_get_error(nullptr, 0), - system::system_category()); + wolfSSL_get_error( nullptr, 0 ), + system::system_category() ); } - // Disable certificate verification for now (TODO: make configurable) - // This is needed when connecting without proper CA certificates - wolfSSL_CTX_set_verify(ctx_, WOLFSSL_VERIFY_NONE, nullptr); - - // Create SSL session - ssl_ = wolfSSL_new(ctx_); - if(!ssl_) + // Create SSL session from cached context + ssl_ = wolfSSL_new( native_ctx ); + if( !ssl_ ) { - int err = wolfSSL_get_error(nullptr, 0); - wolfSSL_CTX_free(ctx_); - ctx_ = nullptr; - return system::error_code(err, system::system_category()); + int err = wolfSSL_get_error( nullptr, 0 ); + return system::error_code( err, system::system_category() ); } // Set custom I/O callbacks - wolfSSL_SSLSetIORecv(ssl_, &recv_callback); - wolfSSL_SSLSetIOSend(ssl_, &send_callback); + wolfSSL_SSLSetIORecv( ssl_, &recv_callback ); + wolfSSL_SSLSetIOSend( ssl_, &send_callback ); // Set this impl as the I/O context - wolfSSL_SetIOReadCtx(ssl_, this); - wolfSSL_SetIOWriteCtx(ssl_, this); + wolfSSL_SetIOReadCtx( ssl_, this ); + wolfSSL_SetIOWriteCtx( ssl_, this ); - // Set SNI (Server Name Indication) - required by most modern TLS servers - // TODO: Make this configurable via API - // WOLFSSL_SNI_HOST_NAME = 0 - wolfSSL_UseSNI(ssl_, 0, "www.boost.org", 13); + // Apply per-session config (SNI) from context + if( !impl.hostname.empty() ) + { + wolfSSL_UseSNI( ssl_, WOLFSSL_SNI_HOST_NAME, + impl.hostname.data(), + static_cast( impl.hostname.size() ) ); + } return {}; } @@ -677,22 +920,28 @@ struct wolfssl_stream_impl_ //------------------------------------------------------------------------------ wolfssl_stream:: -wolfssl_stream(io_stream& stream) - : io_stream(stream.context()) - , s_(stream) +wolfssl_stream( io_stream& stream ) + : tls_stream( stream ) +{ + construct( tls::context() ); +} + +wolfssl_stream:: +wolfssl_stream( io_stream& stream, tls::context ctx ) + : tls_stream( stream ) { - construct(); + construct( std::move( ctx ) ); } void wolfssl_stream:: -construct() +construct( tls::context ctx ) { - auto* impl = new wolfssl_stream_impl_(s_); + auto* impl = new wolfssl_stream_impl_( s_, std::move( ctx ) ); - // Initialize WolfSSL + // Initialize WolfSSL using cached context auto ec = impl->init_ssl(); - if(ec) + if( ec ) { delete impl; // For now, silently fail - could throw or store error diff --git a/test/unit/io_result.cpp b/test/unit/io_result.cpp index 784fbc6..fee893c 100644 --- a/test/unit/io_result.cpp +++ b/test/unit/io_result.cpp @@ -29,12 +29,10 @@ struct io_result_test // Default construction io_result<> r1; BOOST_TEST(!r1.ec); - BOOST_TEST(r1); // With error io_result<> r2{make_error_code(system::errc::invalid_argument)}; BOOST_TEST(r2.ec); - BOOST_TEST(!r2); // Structured binding auto [ec] = r1; @@ -54,20 +52,17 @@ struct io_result_test io_result r1; BOOST_TEST(!r1.ec); BOOST_TEST_EQ(r1.n, 0u); - BOOST_TEST(r1); // With values io_result r2{{}, 42}; BOOST_TEST(!r2.ec); BOOST_TEST_EQ(r2.n, 42u); - BOOST_TEST(r2); // With error io_result r3{ make_error_code(system::errc::invalid_argument), 10}; BOOST_TEST(r3.ec); BOOST_TEST_EQ(r3.n, 10u); - BOOST_TEST(!r3); // Structured binding auto [ec, n] = r2; @@ -88,7 +83,6 @@ struct io_result_test io_result r1{{}, "hello"}; BOOST_TEST(!r1.ec); BOOST_TEST_EQ(r1.value_, "hello"); - BOOST_TEST(r1); // Structured binding auto [ec, v] = r1; @@ -101,7 +95,7 @@ struct io_result_test // With error io_result r2{ make_error_code(system::errc::invalid_argument), "error"}; - BOOST_TEST(!r2); + BOOST_TEST(r2.ec); BOOST_TEST_THROWS(r2.value(), boost::system::system_error); } @@ -112,7 +106,6 @@ struct io_result_test io_result r1{ {}, std::make_tuple(42, 3.14, std::string("test"))}; BOOST_TEST(!r1.ec); - BOOST_TEST(r1); // Structured binding auto [ec, a, b, c] = r1; @@ -131,7 +124,7 @@ struct io_result_test io_result r2{ make_error_code(system::errc::invalid_argument), std::make_tuple(0, 0.0)}; - BOOST_TEST(!r2); + BOOST_TEST(r2.ec); BOOST_TEST_THROWS(r2.value(), boost::system::system_error); } diff --git a/test/unit/signal_set.cpp b/test/unit/signal_set.cpp index e4c50e1..418b656 100644 --- a/test/unit/signal_set.cpp +++ b/test/unit/signal_set.cpp @@ -522,7 +522,7 @@ struct signal_set_test std::raise(SIGINT); auto result = co_await s_ref.async_wait(); - ok_out = static_cast(result); + ok_out = !result.ec; }(s, t, result_ok)); ioc.run(); @@ -543,7 +543,7 @@ struct signal_set_test [](signal_set& s_ref, bool& ok_out, system::error_code& ec_out) -> capy::task<> { auto result = co_await s_ref.async_wait(); - ok_out = static_cast(result); + ok_out = !result.ec; ec_out = result.ec; }(s, result_ok, result_ec)); diff --git a/test/unit/timer.cpp b/test/unit/timer.cpp index 480a09c..0642e83 100644 --- a/test/unit/timer.cpp +++ b/test/unit/timer.cpp @@ -504,7 +504,7 @@ struct timer_test [](timer& t_ref, bool& ok_out) -> capy::task<> { auto result = co_await t_ref.wait(); - ok_out = static_cast(result); + ok_out = !result.ec; }(t, result_ok)); ioc.run(); @@ -528,7 +528,7 @@ struct timer_test [](timer& t_ref, bool& ok_out, system::error_code& ec_out) -> capy::task<> { auto result = co_await t_ref.wait(); - ok_out = static_cast(result); + ok_out = !result.ec; ec_out = result.ec; }(t, result_ok, result_ec)); diff --git a/test/unit/tls/tls_stream.cpp b/test/unit/tls/tls_stream.cpp new file mode 100644 index 0000000..fbb5072 --- /dev/null +++ b/test/unit/tls/tls_stream.cpp @@ -0,0 +1,29 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +// Test that header file is self-contained. +#include + +#include "test_suite.hpp" + +namespace boost { +namespace corosio { + +struct tls_stream_test +{ + void + run() + { + } +}; + +TEST_SUITE(tls_stream_test, "boost.corosio.tls_stream"); + +} // namespace corosio +} // namespace boost diff --git a/test/unit/tls/wolfssl_stream.cpp b/test/unit/tls/wolfssl_stream.cpp new file mode 100644 index 0000000..8217f2c --- /dev/null +++ b/test/unit/tls/wolfssl_stream.cpp @@ -0,0 +1,29 @@ +// +// Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +// Test that header file is self-contained. +#include + +#include "test_suite.hpp" + +namespace boost { +namespace corosio { + +struct wolfssl_stream_test +{ + void + run() + { + } +}; + +TEST_SUITE(wolfssl_stream_test, "boost.corosio.wolfssl_stream"); + +} // namespace corosio +} // namespace boost