Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,6 @@ jobs:
shared: false
build-type: "Release"

- compiler: "clang"
version: "14"
cxxstd: "20"
latest-cxxstd: "20"
cxx: "clang++-14"
cc: "clang-14"
runs-on: "ubuntu-latest"
container: "ubuntu:22.04"
b2-toolset: "clang"
name: "Clang 14: C++20"
shared: true
build-type: "Release"

- compiler: "clang"
version: "20"
cxxstd: "20,23"
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/.vscode/
/.cache/
/build/*
/build-*/
!/build/Jamfile
!/build/brotli.jam
/out/
Expand Down
58 changes: 41 additions & 17 deletions bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// Official repository: https://github.com/cppalliance/capy
//

#include <boost/capy/ex/async_run.hpp>
#include <boost/capy/ex/run_async.hpp>
#include <boost/capy/ex/execution_context.hpp>
#include <boost/capy/ex/run_on.hpp>
#include <boost/capy/ex/strand.hpp>
Expand Down Expand Up @@ -67,11 +67,35 @@ class test_executor
return ctx_ == other.ctx_;
}

void on_work_started() const noexcept
{
}

void on_work_finished() const noexcept
{
}

// Dispatcher interface - dispatch inline
any_coro operator()(any_coro h) const
{
return h;
}

any_coro dispatch(any_coro h) const
{
return h;
}

// Post interface - resume inline for benchmarking
void post(any_coro h) const
{
h.resume();
}

void defer(any_coro h) const
{
h.resume();
}
};

//-----------------------------------------------
Expand Down Expand Up @@ -243,61 +267,61 @@ int main()

foreign_awaitable foreign;

std::cout << "=== async_run depth benchmarks ===\n\n";
std::cout << "=== run_async depth benchmarks ===\n\n";

//-----------------------------------------------
// Flow: c -> (return)
//-----------------------------------------------
run_benchmark("async_run depth=1", iterations, [&]{
run_benchmark("run_async depth=1", iterations, [&]{
int result = 0;
async_run(ex1)(depth1(), [&](int v){ result = v; });
run_async(ex1, [&](int v){ result = v; })(depth1());
(void)result;
});

//-----------------------------------------------
// Flow: c1 -> c2
//-----------------------------------------------
run_benchmark("async_run depth=2", iterations, [&]{
run_benchmark("run_async depth=2", iterations, [&]{
int result = 0;
async_run(ex1)(depth2(), [&](int v){ result = v; });
run_async(ex1, [&](int v){ result = v; })(depth2());
(void)result;
});

//-----------------------------------------------
// Flow: c1 -> c2 -> c3
//-----------------------------------------------
run_benchmark("async_run depth=3", iterations, [&]{
run_benchmark("run_async depth=3", iterations, [&]{
int result = 0;
async_run(ex1)(depth3(), [&](int v){ result = v; });
run_async(ex1, [&](int v){ result = v; })(depth3());
(void)result;
});

//-----------------------------------------------
// Flow: c1 -> c2 -> c3 -> c4
//-----------------------------------------------
run_benchmark("async_run depth=4", iterations, [&]{
run_benchmark("run_async depth=4", iterations, [&]{
int result = 0;
async_run(ex1)(depth4(), [&](int v){ result = v; });
run_async(ex1, [&](int v){ result = v; })(depth4());
(void)result;
});

std::cout << "\n=== strand async_run benchmarks ===\n\n";
std::cout << "\n=== strand run_async benchmarks ===\n\n";

//-----------------------------------------------
// Flow: c -> (return) via strand
//-----------------------------------------------
run_benchmark("strand async_run depth=1", iterations, [&]{
run_benchmark("strand run_async depth=1", iterations, [&]{
int result = 0;
async_run(strand1)(depth1(), [&](int v){ result = v; });
run_async(strand1, [&](int v){ result = v; })(depth1());
(void)result;
});

//-----------------------------------------------
// Flow: c1 -> c2 -> c3 -> c4 via strand
//-----------------------------------------------
run_benchmark("strand async_run depth=4", iterations, [&]{
run_benchmark("strand run_async depth=4", iterations, [&]{
int result = 0;
async_run(strand1)(depth4(), [&](int v){ result = v; });
run_async(strand1, [&](int v){ result = v; })(depth4());
(void)result;
});

Expand All @@ -310,7 +334,7 @@ int main()
//-----------------------------------------------
run_benchmark("run_on executor switch + foreign", iterations, [&]{
int result = 0;
async_run(ex1)(switch_c1(ex2, foreign), [&](int v){ result = v; });
run_async(ex1, [&](int v){ result = v; })(switch_c1(ex2, foreign));
(void)result;
});

Expand All @@ -321,7 +345,7 @@ int main()
//-----------------------------------------------
run_benchmark("run_on strand switch + foreign", iterations, [&]{
int result = 0;
async_run(strand1)(switch_c1(strand2, foreign), [&](int v){ result = v; });
run_async(strand1, [&](int v){ result = v; })(switch_c1(strand2, foreign));
(void)result;
});

Expand Down
6 changes: 3 additions & 3 deletions context/allocator_storage_optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ void deallocate_embedded(void* block, std::size_t user_size) override {
### Before (Copy)

```
Frame #2: async_run_launcher
Frame #2: run_async_launcher
┌─────────────────────────────────────┐
│ promise_type │
│ └─ embedder_ │
Expand All @@ -218,7 +218,7 @@ Total allocator storage: 144 bytes
### After (Pointer)

```
Frame #2: async_run_launcher
Frame #2: run_async_launcher
┌─────────────────────────────────────┐
│ promise_type │
│ └─ embedder_ │
Expand Down Expand Up @@ -282,7 +282,7 @@ static thread_local Allocator* g_allocator;
```

**Problems**:
- Can't support multiple concurrent async_run calls
- Can't support multiple concurrent run_async calls
- Global state is error-prone
- Doesn't work with nested or parallel tasks

Expand Down
44 changes: 22 additions & 22 deletions context/async_run_implementation_notes.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# async_run Implementation: Using Vinnie's Suspended Coroutine Launcher Pattern
# run_async Implementation: Using Vinnie's Suspended Coroutine Launcher Pattern

## Overview

The current `async_run` implementation **successfully uses** Vinnie Falco's suspended coroutine launcher pattern with **exactly two frames**, while also supporting handler-based execution that Vinnie's original design didn't address.
The current `run_async` implementation **successfully uses** Vinnie Falco's suspended coroutine launcher pattern with **exactly two frames**, while also supporting handler-based execution that Vinnie's original design didn't address.

---

Expand Down Expand Up @@ -32,7 +32,7 @@ We implement Vinnie's pattern **exactly as described**, with the addition of han
### Frame Structure (2 Frames Total)

```
Frame #2: async_run_launcher (launcher coroutine)
Frame #2: run_async_launcher (launcher coroutine)
├─ promise_type
│ ├─ d_ (dispatcher) ← Added for handler/dispatcher support
│ ├─ embedder_ (embedding_frame_allocator) ← Added for frame allocator fix
Expand All @@ -51,10 +51,10 @@ Frame #2: async_run_launcher (launcher coroutine)
### Code Structure

```cpp
// async_run() is a COROUTINE - allocates Frame #2
// run_async() is a COROUTINE - allocates Frame #2
template<dispatcher Dispatcher, frame_allocator Allocator>
detail::async_run_launcher<Dispatcher, Allocator>
async_run(Dispatcher d, Allocator alloc = {})
detail::run_async_launcher<Dispatcher, Allocator>
run_async(Dispatcher d, Allocator alloc = {})
{
// TLS set in promise constructor (line 100)
auto& promise = co_await get_promise{};
Expand All @@ -63,7 +63,7 @@ async_run(Dispatcher d, Allocator alloc = {})
```

**This follows Vinnie's pattern**:
- ✅ `async_run()` is a coroutine
- ✅ `run_async()` is a coroutine
- ✅ Allocates Frame #2 first
- ✅ `embedder_` lives in promise (line 89)
- ✅ TLS set in promise constructor (line 100)
Expand Down Expand Up @@ -143,7 +143,7 @@ inner_handle.promise().caller_ex_ = d;
This bypasses the need for a wrapper coroutine that would provide `await_transform` for dispatcher propagation. The dispatcher is simply assigned directly to the inner task's promise.

**Execution flow**:
1. `async_run(d)` creates launcher (Frame #2)
1. `run_async(d)` creates launcher (Frame #2)
2. `operator()` with handler calls `run_with_handler`
3. User task handle obtained (Frame #1 already allocated)
4. Dispatcher assigned directly to inner task
Expand Down Expand Up @@ -219,7 +219,7 @@ This also uses only 2 frames:
| Aspect | Vinnie's Pattern | Our Implementation | Match? |
|--------|------------------|-------------------|--------|
| **Frame count** | 2 frames | 2 frames | ✅ Yes |
| **`async_run()` type** | Coroutine | Coroutine | ✅ Yes |
| **`run_async()` type** | Coroutine | Coroutine | ✅ Yes |
| **Embedder location** | Launcher promise | Launcher promise | ✅ Yes |
| **TLS setup** | Promise constructor | Promise constructor | ✅ Yes |
| **`operator()` return** | Awaitable | Awaitable (+ handler overloads) | ✅ Extended |
Expand All @@ -241,7 +241,7 @@ int result = co_await launcher()(compute_value());

We added handler support:
```cpp
async_run(ex)(compute_value(), [](int result) { /* ... */ });
run_async(ex)(compute_value(), [](int result) { /* ... */ });
```

**How we achieve this with 2 frames**:
Expand All @@ -256,10 +256,10 @@ Our `launch_awaitable` supports two modes:

```cpp
// Mode 1: Fire-and-forget (destructor path)
async_run(ex)(my_task());
run_async(ex)(my_task());

// Mode 2: Awaitable (await_suspend path)
int result = co_await async_run(ex)(my_task());
int result = co_await run_async(ex)(my_task());
```

The `started_` flag tracks which path was taken, and the destructor runs fire-and-forget if not awaited.
Expand All @@ -270,16 +270,16 @@ Support for flexible handler patterns:

```cpp
// Success-only handler (exceptions rethrow)
async_run(ex)(task, [](int result) { });
run_async(ex)(task, [](int result) { });

// Full handler with exception handling
async_run(ex)(task, overload{
run_async(ex)(task, overload{
[](int result) { },
[](std::exception_ptr) { }
});

// Separate success/error handlers
async_run(ex)(task,
run_async(ex)(task,
[](int result) { },
[](std::exception_ptr) { });
```
Expand All @@ -293,8 +293,8 @@ The `handler_pair` utility combines handlers, and `default_handler` provides def
### Frame Allocation Timeline

```
1. async_run(ex) called
└─> async_run() coroutine starts
1. run_async(ex) called
└─> run_async() coroutine starts
└─> Allocates Frame #2 (launcher)
└─> promise_type constructor runs
└─> embedder_ initialized
Expand Down Expand Up @@ -355,7 +355,7 @@ The confusion arose because I was looking at an older version or misread the cod
The allocator is stored **only** in Frame #2's promise, inside the `embedder_`:

```cpp
// Frame #2: async_run_launcher
// Frame #2: run_async_launcher
struct promise_type {
std::optional<Dispatcher> d_;
detail::embedding_frame_allocator<Allocator> embedder_; // Contains the allocator
Expand Down Expand Up @@ -398,7 +398,7 @@ class frame_allocator_wrapper {

```
┌─────────────────────────────────────────────────────────────┐
│ Frame #2: async_run_launcher (heap) │
│ Frame #2: run_async_launcher (heap) │
├─────────────────────────────────────────────────────────────┤
│ promise_type: │
│ ├─ d_: Dispatcher │
Expand Down Expand Up @@ -437,7 +437,7 @@ Since the wrapper (Frame #1) is destroyed first, the pointer to `embedder_.alloc
### Construction Flow

```cpp
// 1. async_run(d, alloc) called
// 1. run_async(d, alloc) called
// Frame #2 allocated, promise_type constructor runs:
promise_type(Dispatcher d, Allocator a)
: embedder_(std::move(a)) // Allocator moved into embedder_
Expand Down Expand Up @@ -617,12 +617,12 @@ co_await launcher()(user_task());

### Comparison to Our Implementation

Our `async_run` implementation follows Vinnie's pattern with these additions:
Our `run_async` implementation follows Vinnie's pattern with these additions:

| Component | Vinnie's Pattern | Our Implementation |
|-----------|------------------|-------------------|
| **Frame allocation** | 2 frames (launcher + user task) | 2 frames (launcher + user task) ✅ |
| **Launcher coroutine** | Suspends immediately, transfers to inner | `async_run_launcher` - same approach ✅ |
| **Launcher coroutine** | Suspends immediately, transfers to inner | `run_async_launcher` - same approach ✅ |
| **Non-coroutine operator()** | Returns awaitable without frame | `launch_awaitable` - same approach ✅ |
| **Frame allocator** | Not specified | Added `embedder_` for allocator lifetime |
| **Handler support** | Not specified | Added `d_` and handler-based execution |
Expand Down
4 changes: 2 additions & 2 deletions doc/modules/ROOT/pages/coroutines/affinity.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ You establish affinity when launching a task:

[source,cpp]
----
async_run(ex)(my_task()); // my_task has affinity to ex
run_async(ex)(my_task()); // my_task has affinity to ex
----

== How Affinity Propagates
Expand All @@ -46,7 +46,7 @@ affinity awaits a child task, the child inherits the same affinity:

[source,cpp]
----
task<void> parent() // affinity: ex (from async_run)
task<void> parent() // affinity: ex (from run_async)
{
co_await child(); // child inherits ex
}
Expand Down
Loading
Loading