From a03a5bd8a31c4de79baf5e14fee36e5718833466 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 20 Mar 2026 17:25:03 +0000 Subject: [PATCH 01/27] feat: add failing test stubs for events package --- server/lib/events/events_test.go | 474 +++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 server/lib/events/events_test.go diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go new file mode 100644 index 00000000..30cd5528 --- /dev/null +++ b/server/lib/events/events_test.go @@ -0,0 +1,474 @@ +package events + +import ( + "bytes" + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestBrowserEvent: construct BrowserEvent with all SCHEMA-01 fields; marshal to JSON; +// assert all snake_case keys present. +func TestBrowserEvent(t *testing.T) { + ev := BrowserEvent{ + CaptureSessionID: "test-session-id", + Seq: 1, + Ts: 1234567890000, + Type: "console_log", + TargetID: "target-1", + CDPSessionID: "cdp-session-1", + FrameID: "frame-1", + ParentFrameID: "parent-frame-1", + URL: "https://example.com", + Data: json.RawMessage(`{"message":"hello"}`), + Truncated: false, + } + + b, err := json.Marshal(ev) + require.NoError(t, err) + + s := string(b) + assert.Contains(t, s, `"capture_session_id"`) + assert.Contains(t, s, `"seq"`) + assert.Contains(t, s, `"ts"`) + assert.Contains(t, s, `"type"`) + assert.Contains(t, s, `"target_id"`) + assert.Contains(t, s, `"cdp_session_id"`) + assert.Contains(t, s, `"frame_id"`) + assert.Contains(t, s, `"parent_frame_id"`) + assert.Contains(t, s, `"url"`) + assert.Contains(t, s, `"data"`) +} + +// TestBrowserEventData: embed a pre-serialized JSON object in Data field; marshal outer event; +// assert Data appears verbatim (no double-encoding). +func TestBrowserEventData(t *testing.T) { + rawData := json.RawMessage(`{"key":"value","num":42}`) + ev := BrowserEvent{ + CaptureSessionID: "test-session", + Seq: 1, + Ts: 1000, + Type: "cdp_event", + Data: rawData, + } + + b, err := json.Marshal(ev) + require.NoError(t, err) + + s := string(b) + // Data must appear verbatim — no double-encoding (should not be escaped string) + assert.Contains(t, s, `"data":{"key":"value","num":42}`) + assert.NotContains(t, s, `"data":"{`) // would indicate double-encoding +} + +// TestCategoryFor: table-driven; assert prefix routing is correct. +func TestCategoryFor(t *testing.T) { + cases := []struct { + eventType string + expected EventCategory + }{ + {"console_log", CategoryConsole}, + {"network_request", CategoryNetwork}, + {"liveview_click", CategoryLiveview}, + {"captcha_solve", CategoryCaptcha}, + {"cdp_nav", CategoryCDP}, + {"unknown_type", CategoryCDP}, + } + + for _, tc := range cases { + t.Run(tc.eventType, func(t *testing.T) { + got := CategoryFor(tc.eventType) + assert.Equal(t, tc.expected, got) + }) + } +} + +// TestRingBuffer: publish 3 events; reader reads all 3 in order. +func TestRingBuffer(t *testing.T) { + rb := NewRingBuffer(10) + reader := rb.NewReader() + + events := []BrowserEvent{ + {Seq: 1, Type: "cdp_event_1"}, + {Seq: 2, Type: "cdp_event_2"}, + {Seq: 3, Type: "cdp_event_3"}, + } + + for _, ev := range events { + rb.Publish(ev) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + for i, expected := range events { + got, err := reader.Read(ctx) + require.NoError(t, err, "reading event %d", i) + assert.Equal(t, expected.Type, got.Type) + } +} + +// TestRingBufferOverflow: ring capacity 2; publish 3 events with no reader; +// assert write returns immediately (no block); reader receives events_dropped then newest events. +func TestRingBufferOverflow(t *testing.T) { + rb := NewRingBuffer(2) + + // Publish 3 events with no reader — must not block + done := make(chan struct{}) + go func() { + rb.Publish(BrowserEvent{Seq: 1, Type: "cdp_event_1"}) + rb.Publish(BrowserEvent{Seq: 2, Type: "cdp_event_2"}) + rb.Publish(BrowserEvent{Seq: 3, Type: "cdp_event_3"}) + close(done) + }() + + select { + case <-done: + // good — did not block + case <-time.After(500 * time.Millisecond): + t.Fatal("Publish blocked with no readers") + } + + // Create reader after overflow; should get events_dropped then available events + reader := rb.NewReader() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + first, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, "events_dropped", first.Type) +} + +// TestEventsDropped: ring capacity 2; reader gets notify channel; publish 3 events; +// reader reads; assert first result is events_dropped BrowserEvent. +func TestEventsDropped(t *testing.T) { + rb := NewRingBuffer(2) + reader := rb.NewReader() + + // Publish 3 events, overflowing the ring (capacity 2) + rb.Publish(BrowserEvent{Seq: 1, Type: "cdp_event_1"}) + rb.Publish(BrowserEvent{Seq: 2, Type: "cdp_event_2"}) + rb.Publish(BrowserEvent{Seq: 3, Type: "cdp_event_3"}) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + first, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, "events_dropped", first.Type) + + // Data must be valid JSON with a "dropped" count + require.NotNil(t, first.Data) + assert.True(t, json.Valid(first.Data)) + assert.Contains(t, string(first.Data), `"dropped"`) +} + +// TestConcurrentReaders: 3 readers subscribe before publish; publish 5 events; +// each reader independently reads all 5; no reader affects another. +func TestConcurrentReaders(t *testing.T) { + rb := NewRingBuffer(20) + + numReaders := 3 + numEvents := 5 + + readers := make([]*Reader, numReaders) + for i := range readers { + readers[i] = rb.NewReader() + } + + // Publish events after readers are created + for i := 0; i < numEvents; i++ { + rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "cdp_event"}) + } + + var wg sync.WaitGroup + results := make([][]BrowserEvent, numReaders) + + for i, r := range readers { + wg.Add(1) + go func(idx int, reader *Reader) { + defer wg.Done() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + var evs []BrowserEvent + for j := 0; j < numEvents; j++ { + ev, err := reader.Read(ctx) + require.NoError(t, err) + evs = append(evs, ev) + } + results[idx] = evs + }(i, r) + } + + wg.Wait() + + // Each reader must have received all 5 events + for i, evs := range results { + assert.Len(t, evs, numEvents, "reader %d", i) + for j, ev := range evs { + assert.Equal(t, uint64(j+1), ev.Seq, "reader %d event %d", i, j) + } + } +} + +// TestFileWriter: per-category JSONL appender tests. +func TestFileWriter(t *testing.T) { + t.Run("writes_to_correct_file", func(t *testing.T) { + dir := t.TempDir() + fw := NewFileWriter(dir) + defer fw.Close() + + ev := BrowserEvent{ + CaptureSessionID: "sess-1", + Seq: 1, + Ts: 1000, + Type: "console_log", + Data: json.RawMessage(`{"message":"hello"}`), + } + require.NoError(t, fw.Write(ev)) + + data, err := os.ReadFile(filepath.Join(dir, "console.log")) + require.NoError(t, err) + + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + require.Len(t, lines, 1) + assert.True(t, json.Valid([]byte(lines[0]))) + assert.Contains(t, lines[0], `"capture_session_id"`) + assert.Contains(t, lines[0], `"console_log"`) + }) + + t.Run("category_routing", func(t *testing.T) { + dir := t.TempDir() + fw := NewFileWriter(dir) + defer fw.Close() + + typeToFile := map[string]string{ + "console_log": "console.log", + "network_request": "network.log", + "liveview_click": "liveview.log", + "captcha_solve": "captcha.log", + "cdp_navigation": "cdp.log", + } + + for typ := range typeToFile { + require.NoError(t, fw.Write(BrowserEvent{Type: typ, Seq: 1, Ts: 1})) + } + + for typ, file := range typeToFile { + data, err := os.ReadFile(filepath.Join(dir, file)) + require.NoError(t, err, "missing file for type %s", typ) + assert.True(t, json.Valid(bytes.TrimRight(data, "\n"))) + } + }) + + t.Run("concurrent_writes", func(t *testing.T) { + dir := t.TempDir() + fw := NewFileWriter(dir) + defer fw.Close() + + const goroutines = 10 + const eventsPerGoroutine = 100 + + var wg sync.WaitGroup + for i := 0; i < goroutines; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + for j := 0; j < eventsPerGoroutine; j++ { + ev := BrowserEvent{ + Seq: uint64(i*eventsPerGoroutine + j), + Type: "console_log", + Ts: 1, + } + require.NoError(t, fw.Write(ev)) + } + }(i) + } + wg.Wait() + + data, err := os.ReadFile(filepath.Join(dir, "console.log")) + require.NoError(t, err) + + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + assert.Len(t, lines, goroutines*eventsPerGoroutine) + for _, line := range lines { + assert.True(t, json.Valid([]byte(line)), "invalid JSON line: %s", line) + } + }) + + t.Run("lazy_open", func(t *testing.T) { + dir := t.TempDir() + fw := NewFileWriter(dir) + defer fw.Close() + + // No writes yet — directory should be empty. + entries, err := os.ReadDir(dir) + require.NoError(t, err) + assert.Empty(t, entries, "files opened before first Write") + + require.NoError(t, fw.Write(BrowserEvent{Type: "console_log", Seq: 1, Ts: 1})) + + entries, err = os.ReadDir(dir) + require.NoError(t, err) + assert.Len(t, entries, 1, "expected exactly one file after first Write") + assert.Equal(t, "console.log", entries[0].Name()) + }) +} + +// TestPipeline: Pipeline glue type tests. +func TestPipeline(t *testing.T) { + newPipeline := func(t *testing.T) (*Pipeline, string) { + t.Helper() + dir := t.TempDir() + rb := NewRingBuffer(100) + fw := NewFileWriter(dir) + p := NewPipeline(rb, fw) + t.Cleanup(func() { p.Close() }) + return p, dir + } + + t.Run("publish_increments_seq", func(t *testing.T) { + p, _ := newPipeline(t) + reader := p.NewReader() + + for i := 0; i < 3; i++ { + p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + for want := uint64(1); want <= 3; want++ { + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, want, ev.Seq, "expected seq %d got %d", want, ev.Seq) + } + }) + + t.Run("publish_sets_ts", func(t *testing.T) { + p, _ := newPipeline(t) + reader := p.NewReader() + + before := time.Now().UnixMilli() + p.Publish(BrowserEvent{Type: "cdp_event"}) // Ts == 0 + after := time.Now().UnixMilli() + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.GreaterOrEqual(t, ev.Ts, before) + assert.LessOrEqual(t, ev.Ts, after) + }) + + t.Run("publish_writes_file", func(t *testing.T) { + p, dir := newPipeline(t) + + p.Publish(BrowserEvent{Type: "console_log", Ts: 1}) + + data, err := os.ReadFile(filepath.Join(dir, "console.log")) + require.NoError(t, err) + + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + require.Len(t, lines, 1) + assert.True(t, json.Valid([]byte(lines[0]))) + assert.Contains(t, lines[0], `"console_log"`) + }) + + t.Run("publish_writes_ring", func(t *testing.T) { + p, _ := newPipeline(t) + + // Subscribe reader BEFORE publish. + reader := p.NewReader() + p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, "cdp_event", ev.Type) + }) + + t.Run("start_sets_capture_session_id", func(t *testing.T) { + p, _ := newPipeline(t) + p.Start("test-uuid") + + reader := p.NewReader() + p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, "test-uuid", ev.CaptureSessionID) + }) + + t.Run("truncation_applied", func(t *testing.T) { + p, dir := newPipeline(t) + reader := p.NewReader() + + largeData := strings.Repeat("x", 1_100_000) + rawData, err := json.Marshal(map[string]string{"payload": largeData}) + require.NoError(t, err) + + p.Publish(BrowserEvent{ + Type: "cdp_event", + Ts: 1, + Data: json.RawMessage(rawData), + }) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + // Ring buffer event must have Truncated==true. + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.True(t, ev.Truncated) + + // File must contain valid JSON with truncated==true. + data, err := os.ReadFile(filepath.Join(dir, "cdp.log")) + require.NoError(t, err) + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + require.Len(t, lines, 1) + assert.True(t, json.Valid([]byte(lines[0]))) + assert.Contains(t, lines[0], `"truncated":true`) + }) +} + +// TestTruncation: construct event with Data = 1.1MB JSON bytes; call truncateIfNeeded; +// assert Truncated==true and json.Valid(result.Data)==true and len(marshal(result)) <= 1_000_000. +func TestTruncation(t *testing.T) { + // Build a Data field that is ~1.1MB + largeData := strings.Repeat("x", 1_100_000) + rawData, err := json.Marshal(map[string]string{"payload": largeData}) + require.NoError(t, err) + + ev := BrowserEvent{ + CaptureSessionID: "test-session", + Seq: 1, + Ts: 1000, + Type: "cdp_event", + Data: json.RawMessage(rawData), + } + + result := truncateIfNeeded(ev) + + assert.True(t, result.Truncated) + assert.True(t, json.Valid(result.Data)) + + marshaled, err := json.Marshal(result) + require.NoError(t, err) + assert.LessOrEqual(t, len(marshaled), 1_000_000) +} From 42f415f52064a49ac807503f985b1f873be0f8a6 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 20 Mar 2026 17:25:09 +0000 Subject: [PATCH 02/27] feat: add BrowserEvent struct, CategoryFor, and truncateIfNeeded --- server/lib/events/event.go | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 server/lib/events/event.go diff --git a/server/lib/events/event.go b/server/lib/events/event.go new file mode 100644 index 00000000..eb9c5674 --- /dev/null +++ b/server/lib/events/event.go @@ -0,0 +1,68 @@ +package events + +import ( + "encoding/json" + "strings" +) + +// maxS2RecordBytes is the S2 record size limit (SCHEMA-04). +const maxS2RecordBytes = 1_000_000 + +// EventCategory maps event type prefixes to log file names. +type EventCategory string + +const ( + CategoryCDP EventCategory = "cdp" + CategoryConsole EventCategory = "console" + CategoryNetwork EventCategory = "network" + CategoryLiveview EventCategory = "liveview" + CategoryCaptcha EventCategory = "captcha" +) + +// BrowserEvent is the canonical event structure for the browser capture pipeline. +type BrowserEvent struct { + CaptureSessionID string `json:"capture_session_id"` + Seq uint64 `json:"seq"` + Ts int64 `json:"ts"` + Type string `json:"type"` + TargetID string `json:"target_id,omitempty"` + CDPSessionID string `json:"cdp_session_id,omitempty"` + FrameID string `json:"frame_id,omitempty"` + ParentFrameID string `json:"parent_frame_id,omitempty"` + URL string `json:"url,omitempty"` + Data json.RawMessage `json:"data,omitempty"` + Truncated bool `json:"truncated,omitempty"` +} + +// CategoryFor returns the log category for a given event type. +// Event types follow the pattern "_", e.g. "console_log", +// "network_request", "cdp_navigation". Types not matching a known prefix +// fall through to CategoryCDP as a safe default. +func CategoryFor(eventType string) EventCategory { + prefix, _, _ := strings.Cut(eventType, "_") + switch prefix { + case "console": + return CategoryConsole + case "network": + return CategoryNetwork + case "liveview": + return CategoryLiveview + case "captcha": + return CategoryCaptcha + default: + return CategoryCDP + } +} + +// truncateIfNeeded returns a copy of ev with Data replaced with json.RawMessage("null") +// and Truncated set to true if the marshaled size exceeds maxS2RecordBytes. +// Per RESEARCH pitfall 3: never attempt byte-slice truncation of the Data field. +func truncateIfNeeded(ev BrowserEvent) BrowserEvent { + candidate, err := json.Marshal(ev) + if err != nil || len(candidate) <= maxS2RecordBytes { + return ev + } + ev.Data = json.RawMessage("null") + ev.Truncated = true + return ev +} From fa67dfff38682d44e2ec6c3d1dcce0186e489f7e Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 20 Mar 2026 17:25:11 +0000 Subject: [PATCH 03/27] feat: add RingBuffer with closed-channel broadcast fan-out --- server/lib/events/ringbuffer.go | 110 ++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 server/lib/events/ringbuffer.go diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go new file mode 100644 index 00000000..3911912e --- /dev/null +++ b/server/lib/events/ringbuffer.go @@ -0,0 +1,110 @@ +package events + +import ( + "context" + "encoding/json" + "fmt" + "sync" +) + +// RingBuffer is a fixed-capacity circular buffer with closed-channel broadcast fan-out. +// Writers never block regardless of reader count or speed. +// Readers track their position by seq value (not ring index) and receive an +// events_dropped synthetic BrowserEvent when they fall behind the oldest retained event. +type RingBuffer struct { + mu sync.RWMutex + buf []BrowserEvent + head int // next write position (mod cap) + count int // items currently stored (0..cap) + written uint64 // total ever published (monotonic) + notify chan struct{} +} + +// NewRingBuffer creates a new RingBuffer with the given capacity. +func NewRingBuffer(capacity int) *RingBuffer { + return &RingBuffer{ + buf: make([]BrowserEvent, capacity), + notify: make(chan struct{}), + } +} + +// Publish adds an event to the ring buffer, evicting the oldest entry on overflow. +// Closes the current notify channel (waking all waiting readers) and replaces it +// with a new one — outside the lock to avoid blocking under contention. +func (rb *RingBuffer) Publish(ev BrowserEvent) { + rb.mu.Lock() + rb.buf[rb.head] = ev + rb.head = (rb.head + 1) % len(rb.buf) + if rb.count < len(rb.buf) { + rb.count++ + } + rb.written++ + old := rb.notify + rb.notify = make(chan struct{}) + rb.mu.Unlock() + close(old) // outside lock to avoid blocking under contention +} + +// oldestSeq returns the seq of the oldest event still in the ring. +// Must be called under at least a read lock. +func (rb *RingBuffer) oldestSeq() uint64 { + if rb.written <= uint64(len(rb.buf)) { + return 0 + } + return rb.written - uint64(len(rb.buf)) +} + +// NewReader returns a Reader positioned at seq 0. +// If the ring has already published events, the reader will receive an +// events_dropped BrowserEvent on the first Read call if it has fallen behind +// the oldest retained event. +func (rb *RingBuffer) NewReader() *Reader { + return &Reader{rb: rb, nextSeq: 0} +} + +// Reader tracks an independent read position in a RingBuffer. +type Reader struct { + rb *RingBuffer + nextSeq uint64 +} + +// Read blocks until the next event is available or ctx is cancelled. +// Returns (event, nil) for a normal event. +// Returns (events_dropped BrowserEvent, nil) if the reader has fallen behind +// the ring's oldest retained event — the dropped count is in Data as valid JSON. +func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { + for { + r.rb.mu.RLock() + notify := r.rb.notify + oldest := r.rb.oldestSeq() + written := r.rb.written + + // Reader fell behind — synthesize events_dropped before advancing. + if r.nextSeq < oldest { + dropped := oldest - r.nextSeq + r.nextSeq = oldest + r.rb.mu.RUnlock() + data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) + return BrowserEvent{Type: "events_dropped", Data: data}, nil + } + + // Event is available — read it. + if r.nextSeq < written { + idx := int(r.nextSeq % uint64(len(r.rb.buf))) + ev := r.rb.buf[idx] + r.nextSeq++ + r.rb.mu.RUnlock() + return ev, nil + } + + // No event yet — wait for notification. + r.rb.mu.RUnlock() + + select { + case <-ctx.Done(): + return BrowserEvent{}, ctx.Err() + case <-notify: + // new event available; loop to read it + } + } +} From 115c7209cbb5645a08e938efd97463747db06cfa Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 20 Mar 2026 17:25:16 +0000 Subject: [PATCH 04/27] feat: add FileWriter per-category JSONL appender --- server/lib/events/filewriter.go | 78 +++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 server/lib/events/filewriter.go diff --git a/server/lib/events/filewriter.go b/server/lib/events/filewriter.go new file mode 100644 index 00000000..4b40d204 --- /dev/null +++ b/server/lib/events/filewriter.go @@ -0,0 +1,78 @@ +package events + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" +) + +// FileWriter is a per-category JSONL appender. It opens each log file lazily on +// first write (O_APPEND|O_CREATE|O_WRONLY) and serialises concurrent writes +// within a category with a single mutex. +type FileWriter struct { + mu sync.Mutex + files map[EventCategory]*os.File + dir string +} + +// NewFileWriter returns a FileWriter that writes to dir. +// No files are opened until the first Write call. +func NewFileWriter(dir string) *FileWriter { + return &FileWriter{dir: dir, files: make(map[EventCategory]*os.File)} +} + +// Write serialises ev to JSON and appends it as a single JSONL line to the +// per-category log file. The mutex is held for the entire open+marshal+write +// sequence to prevent TOCTOU races and to guarantee whole-line atomicity for +// events larger than PIPE_BUF. +func (fw *FileWriter) Write(ev BrowserEvent) error { + cat := CategoryFor(ev.Type) + + fw.mu.Lock() + defer fw.mu.Unlock() + + // Lazy open. + f, ok := fw.files[cat] + if !ok { + path := filepath.Join(fw.dir, string(cat)+".log") + var err error + f, err = os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("filewriter: open %s: %w", path, err) + } + fw.files[cat] = f + } + + data, err := json.Marshal(ev) + if err != nil { + return fmt.Errorf("filewriter: marshal: %w", err) + } + + var buf bytes.Buffer + buf.Write(data) + buf.WriteByte('\n') + + if _, err := f.Write(buf.Bytes()); err != nil { + return fmt.Errorf("filewriter: write: %w", err) + } + + return nil +} + +// Close closes all open log file descriptors. The first encountered error is +// returned; subsequent files are still closed. +func (fw *FileWriter) Close() error { + fw.mu.Lock() + defer fw.mu.Unlock() + + var firstErr error + for _, f := range fw.files { + if err := f.Close(); err != nil && firstErr == nil { + firstErr = err + } + } + return firstErr +} From f07e40d2fce59b7539e53778e5401f3cb476a6dc Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 20 Mar 2026 17:25:20 +0000 Subject: [PATCH 05/27] feat: add Pipeline glue type sequencing truncation, file write, and ring publish --- server/lib/events/pipeline.go | 67 +++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 server/lib/events/pipeline.go diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go new file mode 100644 index 00000000..11661150 --- /dev/null +++ b/server/lib/events/pipeline.go @@ -0,0 +1,67 @@ +package events + +import ( + "sync/atomic" + "time" +) + +// Pipeline glues a RingBuffer and a FileWriter into a single write path. +// A single call to Publish stamps the event with a monotonic sequence number, +// applies truncation, durably appends it to the per-category log file, and +// then makes it available to ring buffer readers. +type Pipeline struct { + ring *RingBuffer + files *FileWriter + seq atomic.Uint64 + captureSessionID atomic.Value // stores string +} + +// NewPipeline returns a Pipeline backed by the supplied ring and file writer. +func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { + p := &Pipeline{ring: ring, files: files} + p.captureSessionID.Store("") + return p +} + +// Start sets the capture session ID that will be stamped on every subsequent +// published event. It may be called at any time; the change is immediately +// visible to concurrent Publish calls. +func (p *Pipeline) Start(captureSessionID string) { + p.captureSessionID.Store(captureSessionID) +} + +// Publish stamps, truncates, files, and broadcasts a single event. +// +// Ordering: +// 1. Stamp CaptureSessionID, Seq, Ts (Ts only if caller left it zero) +// 2. Apply truncateIfNeeded (SCHEMA-04) — must happen before both sinks +// 3. Write to FileWriter (durable before in-memory) +// 4. Publish to RingBuffer (in-memory fan-out) +// +// Errors from FileWriter.Write are silently dropped; the ring buffer always +// receives the event even if the file write fails. +func (p *Pipeline) Publish(ev BrowserEvent) { + ev.CaptureSessionID = p.captureSessionID.Load().(string) + ev.Seq = p.seq.Add(1) // starts at 1 + if ev.Ts == 0 { + ev.Ts = time.Now().UnixMilli() + } + ev = truncateIfNeeded(ev) + + // File write first — durable before in-memory. + _ = p.files.Write(ev) + + // Ring buffer last — readers see the event after the file is written. + p.ring.Publish(ev) +} + +// NewReader returns a Reader positioned at the start of the ring buffer. +func (p *Pipeline) NewReader() *Reader { + return p.ring.NewReader() +} + +// Close closes the underlying FileWriter, flushing and releasing all open +// file descriptors. +func (p *Pipeline) Close() error { + return p.files.Close() +} From 18fdb6d0f7476f2893edc5b4ed34f51039e9bdfb Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 27 Mar 2026 11:37:14 +0000 Subject: [PATCH 06/27] review: fix truncateIfNeeded branch split, atomic.Pointer[string], Reader godoc, and test correctness --- server/lib/events/event.go | 82 +++++--- server/lib/events/events_test.go | 330 ++++++++++++++++++------------- server/lib/events/pipeline.go | 15 +- 3 files changed, 252 insertions(+), 175 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index eb9c5674..b174147e 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -2,29 +2,66 @@ package events import ( "encoding/json" - "strings" ) -// maxS2RecordBytes is the S2 record size limit (SCHEMA-04). +// maxS2RecordBytes is the S2 event pipeline maximum record size (1 MB). +// Events exceeding this limit have their Data field replaced with null and +// Truncated set to true before being written to the file and ring sinks. const maxS2RecordBytes = 1_000_000 -// EventCategory maps event type prefixes to log file names. +// EventCategory is a first-class envelope field that determines log file routing. type EventCategory string const ( - CategoryCDP EventCategory = "cdp" - CategoryConsole EventCategory = "console" - CategoryNetwork EventCategory = "network" - CategoryLiveview EventCategory = "liveview" - CategoryCaptcha EventCategory = "captcha" + CategoryConsole EventCategory = "console" + CategoryNetwork EventCategory = "network" + CategoryPage EventCategory = "page" + CategoryInteraction EventCategory = "interaction" + CategoryLiveview EventCategory = "liveview" + CategoryCaptcha EventCategory = "captcha" + CategorySystem EventCategory = "system" +) + +// SourceKind identifies the provenance of an event — which subsystem produced it. +type SourceKind string + +const ( + SourceCDP SourceKind = "cdp" + SourceKernelAPI SourceKind = "kernel_api" + SourceExtension SourceKind = "extension" + SourceLocalProcess SourceKind = "local_process" +) + +// DetailLevel controls the verbosity of the event payload. +type DetailLevel string + +const ( + DetailMinimal DetailLevel = "minimal" + DetailDefault DetailLevel = "default" + DetailVerbose DetailLevel = "verbose" + DetailRaw DetailLevel = "raw" ) // BrowserEvent is the canonical event structure for the browser capture pipeline. +// +// The envelope is designed so that capture config and subscription selectors +// can operate on stable, first-class fields (Category, SourceKind, DetailLevel) +// without parsing the Type string. Type carries semantic identity (e.g. +// "console.log", "network.request"); SourceEvent carries the raw upstream +// event name (e.g. "Runtime.consoleAPICalled") for diagnostics. +// +// DetailLevel is always serialised (no omitempty). Pipeline.Publish defaults it +// to DetailDefault; callers constructing events outside a Pipeline should set it +// explicitly. type BrowserEvent struct { CaptureSessionID string `json:"capture_session_id"` Seq uint64 `json:"seq"` Ts int64 `json:"ts"` Type string `json:"type"` + Category EventCategory `json:"category"` + SourceKind SourceKind `json:"source_kind"` + SourceEvent string `json:"source_event,omitempty"` + DetailLevel DetailLevel `json:"detail_level"` TargetID string `json:"target_id,omitempty"` CDPSessionID string `json:"cdp_session_id,omitempty"` FrameID string `json:"frame_id,omitempty"` @@ -34,32 +71,17 @@ type BrowserEvent struct { Truncated bool `json:"truncated,omitempty"` } -// CategoryFor returns the log category for a given event type. -// Event types follow the pattern "_", e.g. "console_log", -// "network_request", "cdp_navigation". Types not matching a known prefix -// fall through to CategoryCDP as a safe default. -func CategoryFor(eventType string) EventCategory { - prefix, _, _ := strings.Cut(eventType, "_") - switch prefix { - case "console": - return CategoryConsole - case "network": - return CategoryNetwork - case "liveview": - return CategoryLiveview - case "captcha": - return CategoryCaptcha - default: - return CategoryCDP - } -} - // truncateIfNeeded returns a copy of ev with Data replaced with json.RawMessage("null") // and Truncated set to true if the marshaled size exceeds maxS2RecordBytes. -// Per RESEARCH pitfall 3: never attempt byte-slice truncation of the Data field. +// Never attempt byte-slice truncation of the Data field — partial JSON is invalid. func truncateIfNeeded(ev BrowserEvent) BrowserEvent { candidate, err := json.Marshal(ev) - if err != nil || len(candidate) <= maxS2RecordBytes { + if err != nil { + // Marshal should never fail for BrowserEvent (all fields are JSON-safe), + // but if it does return ev unchanged rather than silently nulling Data. + return ev + } + if len(candidate) <= maxS2RecordBytes { return ev } ev.Data = json.RawMessage("null") diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index 30cd5528..deb390c0 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -15,37 +15,44 @@ import ( "github.com/stretchr/testify/require" ) -// TestBrowserEvent: construct BrowserEvent with all SCHEMA-01 fields; marshal to JSON; -// assert all snake_case keys present. -func TestBrowserEvent(t *testing.T) { +// TestBrowserEventSerialization: round-trip marshal/unmarshal verifying all SCHEMA-01 +// envelope fields serialize with correct JSON keys and values, including provenance. +func TestBrowserEventSerialization(t *testing.T) { ev := BrowserEvent{ CaptureSessionID: "test-session-id", Seq: 1, Ts: 1234567890000, - Type: "console_log", + Type: "console.log", + Category: CategoryConsole, + SourceKind: SourceCDP, + SourceEvent: "Runtime.consoleAPICalled", + DetailLevel: DetailDefault, TargetID: "target-1", CDPSessionID: "cdp-session-1", FrameID: "frame-1", ParentFrameID: "parent-frame-1", URL: "https://example.com", Data: json.RawMessage(`{"message":"hello"}`), - Truncated: false, } b, err := json.Marshal(ev) require.NoError(t, err) - s := string(b) - assert.Contains(t, s, `"capture_session_id"`) - assert.Contains(t, s, `"seq"`) - assert.Contains(t, s, `"ts"`) - assert.Contains(t, s, `"type"`) - assert.Contains(t, s, `"target_id"`) - assert.Contains(t, s, `"cdp_session_id"`) - assert.Contains(t, s, `"frame_id"`) - assert.Contains(t, s, `"parent_frame_id"`) - assert.Contains(t, s, `"url"`) - assert.Contains(t, s, `"data"`) + var decoded map[string]any + require.NoError(t, json.Unmarshal(b, &decoded)) + + assert.Equal(t, "console.log", decoded["type"]) + assert.Equal(t, "console", decoded["category"]) + assert.Equal(t, "cdp", decoded["source_kind"]) + assert.Equal(t, "Runtime.consoleAPICalled", decoded["source_event"]) + assert.Equal(t, "default", decoded["detail_level"]) + assert.Equal(t, "test-session-id", decoded["capture_session_id"]) + assert.Equal(t, float64(1), decoded["seq"]) + assert.Equal(t, "target-1", decoded["target_id"]) + assert.Equal(t, "cdp-session-1", decoded["cdp_session_id"]) + assert.Equal(t, "frame-1", decoded["frame_id"]) + assert.Equal(t, "parent-frame-1", decoded["parent_frame_id"]) + assert.Equal(t, "https://example.com", decoded["url"]) } // TestBrowserEventData: embed a pre-serialized JSON object in Data field; marshal outer event; @@ -56,7 +63,9 @@ func TestBrowserEventData(t *testing.T) { CaptureSessionID: "test-session", Seq: 1, Ts: 1000, - Type: "cdp_event", + Type: "page.navigation", + Category: CategoryPage, + SourceKind: SourceCDP, Data: rawData, } @@ -64,31 +73,28 @@ func TestBrowserEventData(t *testing.T) { require.NoError(t, err) s := string(b) - // Data must appear verbatim — no double-encoding (should not be escaped string) assert.Contains(t, s, `"data":{"key":"value","num":42}`) assert.NotContains(t, s, `"data":"{`) // would indicate double-encoding } -// TestCategoryFor: table-driven; assert prefix routing is correct. -func TestCategoryFor(t *testing.T) { - cases := []struct { - eventType string - expected EventCategory - }{ - {"console_log", CategoryConsole}, - {"network_request", CategoryNetwork}, - {"liveview_click", CategoryLiveview}, - {"captcha_solve", CategoryCaptcha}, - {"cdp_nav", CategoryCDP}, - {"unknown_type", CategoryCDP}, +// TestBrowserEventOmitEmpty: source_event is omitted when empty; detail_level always present. +func TestBrowserEventOmitEmpty(t *testing.T) { + ev := BrowserEvent{ + CaptureSessionID: "sess", + Seq: 1, + Ts: 1000, + Type: "console.log", + Category: CategoryConsole, + SourceKind: SourceCDP, } - for _, tc := range cases { - t.Run(tc.eventType, func(t *testing.T) { - got := CategoryFor(tc.eventType) - assert.Equal(t, tc.expected, got) - }) - } + b, err := json.Marshal(ev) + require.NoError(t, err) + + s := string(b) + assert.NotContains(t, s, `"source_event"`) + // detail_level is always serialized (not omitempty) — zero value is "" + assert.Contains(t, s, `"detail_level"`) } // TestRingBuffer: publish 3 events; reader reads all 3 in order. @@ -97,9 +103,9 @@ func TestRingBuffer(t *testing.T) { reader := rb.NewReader() events := []BrowserEvent{ - {Seq: 1, Type: "cdp_event_1"}, - {Seq: 2, Type: "cdp_event_2"}, - {Seq: 3, Type: "cdp_event_3"}, + {Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}, + {Seq: 2, Type: "network.request", Category: CategoryNetwork, SourceKind: SourceCDP}, + {Seq: 3, Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP}, } for _, ev := range events { @@ -113,62 +119,113 @@ func TestRingBuffer(t *testing.T) { got, err := reader.Read(ctx) require.NoError(t, err, "reading event %d", i) assert.Equal(t, expected.Type, got.Type) + assert.Equal(t, expected.Category, got.Category) } } -// TestRingBufferOverflow: ring capacity 2; publish 3 events with no reader; -// assert write returns immediately (no block); reader receives events_dropped then newest events. -func TestRingBufferOverflow(t *testing.T) { +// TestRingBufferOverflowNoBlock: writer never blocks even with no readers; +// late-joining reader gets events.dropped with correct envelope fields. +func TestRingBufferOverflowNoBlock(t *testing.T) { rb := NewRingBuffer(2) - // Publish 3 events with no reader — must not block done := make(chan struct{}) go func() { - rb.Publish(BrowserEvent{Seq: 1, Type: "cdp_event_1"}) - rb.Publish(BrowserEvent{Seq: 2, Type: "cdp_event_2"}) - rb.Publish(BrowserEvent{Seq: 3, Type: "cdp_event_3"}) + rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) close(done) }() select { case <-done: - // good — did not block - case <-time.After(500 * time.Millisecond): + case <-time.After(5 * time.Millisecond): t.Fatal("Publish blocked with no readers") } - // Create reader after overflow; should get events_dropped then available events reader := rb.NewReader() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() first, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events_dropped", first.Type) + assert.Equal(t, "events.dropped", first.Type) + assert.Equal(t, CategorySystem, first.Category) + assert.Equal(t, SourceKernelAPI, first.SourceKind) } -// TestEventsDropped: ring capacity 2; reader gets notify channel; publish 3 events; -// reader reads; assert first result is events_dropped BrowserEvent. -func TestEventsDropped(t *testing.T) { +// TestRingBufferOverflowExistingReader: reader created before overflow +// gets events.dropped with exact count, then continues reading. +func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) reader := rb.NewReader() - // Publish 3 events, overflowing the ring (capacity 2) - rb.Publish(BrowserEvent{Seq: 1, Type: "cdp_event_1"}) - rb.Publish(BrowserEvent{Seq: 2, Type: "cdp_event_2"}) - rb.Publish(BrowserEvent{Seq: 3, Type: "cdp_event_3"}) + rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() first, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events_dropped", first.Type) + assert.Equal(t, "events.dropped", first.Type) + assert.Equal(t, CategorySystem, first.Category) - // Data must be valid JSON with a "dropped" count require.NotNil(t, first.Data) assert.True(t, json.Valid(first.Data)) - assert.Contains(t, string(first.Data), `"dropped"`) + assert.JSONEq(t, `{"dropped":1}`, string(first.Data)) + + // After the drop sentinel the reader continues with the surviving events + // (seq 2 and 3, which fit in the capacity-2 buffer). + second, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, uint64(2), second.Seq) + + third, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, uint64(3), third.Seq) +} + +// TestConcurrentPublishRead: readers blocked on Read while a writer publishes +// concurrently — exercises locking and notify paths under go test -race. +func TestConcurrentPublishRead(t *testing.T) { + const numEvents = 20 + rb := NewRingBuffer(32) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + reader := rb.NewReader() + + var wg sync.WaitGroup + + // Reader goroutine: reads numEvents events. + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < numEvents; i++ { + _, err := reader.Read(ctx) + if !assert.NoError(t, err) { + return + } + } + }() + + // Writer goroutine: publishes numEvents events. + wg.Add(1) + go func() { + defer wg.Done() + for i := 1; i <= numEvents; i++ { + rb.Publish(BrowserEvent{ + Seq: uint64(i), + Type: "console.log", + Category: CategoryConsole, + SourceKind: SourceCDP, + }) + } + }() + + wg.Wait() } // TestConcurrentReaders: 3 readers subscribe before publish; publish 5 events; @@ -184,9 +241,8 @@ func TestConcurrentReaders(t *testing.T) { readers[i] = rb.NewReader() } - // Publish events after readers are created for i := 0; i < numEvents; i++ { - rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "cdp_event"}) + rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) } var wg sync.WaitGroup @@ -202,7 +258,9 @@ func TestConcurrentReaders(t *testing.T) { var evs []BrowserEvent for j := 0; j < numEvents; j++ { ev, err := reader.Read(ctx) - require.NoError(t, err) + if !assert.NoError(t, err) { + break + } evs = append(evs, ev) } results[idx] = evs @@ -211,7 +269,6 @@ func TestConcurrentReaders(t *testing.T) { wg.Wait() - // Each reader must have received all 5 events for i, evs := range results { assert.Len(t, evs, numEvents, "reader %d", i) for j, ev := range evs { @@ -222,52 +279,51 @@ func TestConcurrentReaders(t *testing.T) { // TestFileWriter: per-category JSONL appender tests. func TestFileWriter(t *testing.T) { - t.Run("writes_to_correct_file", func(t *testing.T) { + t.Run("category_routing", func(t *testing.T) { dir := t.TempDir() fw := NewFileWriter(dir) defer fw.Close() - ev := BrowserEvent{ - CaptureSessionID: "sess-1", - Seq: 1, - Ts: 1000, - Type: "console_log", - Data: json.RawMessage(`{"message":"hello"}`), + eventsToFile := []struct { + ev BrowserEvent + file string + category string + }{ + {BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, + {BrowserEvent{Type: "network.request", Category: CategoryNetwork, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, + {BrowserEvent{Type: "liveview.click", Category: CategoryLiveview, SourceKind: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, + {BrowserEvent{Type: "captcha.solve", Category: CategoryCaptcha, SourceKind: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, + {BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, + {BrowserEvent{Type: "input.click", Category: CategoryInteraction, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, + {BrowserEvent{Type: "monitor.connected", Category: CategorySystem, SourceKind: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, } - require.NoError(t, fw.Write(ev)) - data, err := os.ReadFile(filepath.Join(dir, "console.log")) - require.NoError(t, err) + for _, e := range eventsToFile { + require.NoError(t, fw.Write(e.ev)) + } - lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") - require.Len(t, lines, 1) - assert.True(t, json.Valid([]byte(lines[0]))) - assert.Contains(t, lines[0], `"capture_session_id"`) - assert.Contains(t, lines[0], `"console_log"`) + for _, e := range eventsToFile { + data, err := os.ReadFile(filepath.Join(dir, e.file)) + require.NoError(t, err, "missing file %s for type %s", e.file, e.ev.Type) + + line := bytes.TrimRight(data, "\n") + require.True(t, json.Valid(line), "invalid JSON in %s", e.file) + + var decoded map[string]any + require.NoError(t, json.Unmarshal(line, &decoded)) + assert.Equal(t, e.category, decoded["category"], "wrong category in %s", e.file) + assert.Equal(t, string(e.ev.SourceKind), decoded["source_kind"], "wrong source_kind in %s", e.file) + } }) - t.Run("category_routing", func(t *testing.T) { + t.Run("empty_category_rejected", func(t *testing.T) { dir := t.TempDir() fw := NewFileWriter(dir) defer fw.Close() - typeToFile := map[string]string{ - "console_log": "console.log", - "network_request": "network.log", - "liveview_click": "liveview.log", - "captcha_solve": "captcha.log", - "cdp_navigation": "cdp.log", - } - - for typ := range typeToFile { - require.NoError(t, fw.Write(BrowserEvent{Type: typ, Seq: 1, Ts: 1})) - } - - for typ, file := range typeToFile { - data, err := os.ReadFile(filepath.Join(dir, file)) - require.NoError(t, err, "missing file for type %s", typ) - assert.True(t, json.Valid(bytes.TrimRight(data, "\n"))) - } + err := fw.Write(BrowserEvent{Type: "mystery", Category: "", SourceKind: SourceCDP, Seq: 1, Ts: 1}) + require.Error(t, err) + assert.Contains(t, err.Error(), "empty category") }) t.Run("concurrent_writes", func(t *testing.T) { @@ -285,9 +341,11 @@ func TestFileWriter(t *testing.T) { defer wg.Done() for j := 0; j < eventsPerGoroutine; j++ { ev := BrowserEvent{ - Seq: uint64(i*eventsPerGoroutine + j), - Type: "console_log", - Ts: 1, + Seq: uint64(i*eventsPerGoroutine + j), + Type: "console.log", + Category: CategoryConsole, + SourceKind: SourceCDP, + Ts: 1, } require.NoError(t, fw.Write(ev)) } @@ -310,12 +368,11 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - // No writes yet — directory should be empty. entries, err := os.ReadDir(dir) require.NoError(t, err) assert.Empty(t, entries, "files opened before first Write") - require.NoError(t, fw.Write(BrowserEvent{Type: "console_log", Seq: 1, Ts: 1})) + require.NoError(t, fw.Write(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Seq: 1, Ts: 1})) entries, err = os.ReadDir(dir) require.NoError(t, err) @@ -341,7 +398,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() for i := 0; i < 3; i++ { - p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) } ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -359,7 +416,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() before := time.Now().UnixMilli() - p.Publish(BrowserEvent{Type: "cdp_event"}) // Ts == 0 + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP}) // Ts == 0 after := time.Now().UnixMilli() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -374,7 +431,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_writes_file", func(t *testing.T) { p, dir := newPipeline(t) - p.Publish(BrowserEvent{Type: "console_log", Ts: 1}) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) data, err := os.ReadFile(filepath.Join(dir, "console.log")) require.NoError(t, err) @@ -382,22 +439,22 @@ func TestPipeline(t *testing.T) { lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") require.Len(t, lines, 1) assert.True(t, json.Valid([]byte(lines[0]))) - assert.Contains(t, lines[0], `"console_log"`) + assert.Contains(t, lines[0], `"console.log"`) }) t.Run("publish_writes_ring", func(t *testing.T) { p, _ := newPipeline(t) - // Subscribe reader BEFORE publish. reader := p.NewReader() - p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() ev, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "cdp_event", ev.Type) + assert.Equal(t, "page.navigation", ev.Type) + assert.Equal(t, CategoryPage, ev.Category) }) t.Run("start_sets_capture_session_id", func(t *testing.T) { @@ -405,7 +462,7 @@ func TestPipeline(t *testing.T) { p.Start("test-uuid") reader := p.NewReader() - p.Publish(BrowserEvent{Type: "cdp_event", Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -424,51 +481,48 @@ func TestPipeline(t *testing.T) { require.NoError(t, err) p.Publish(BrowserEvent{ - Type: "cdp_event", - Ts: 1, - Data: json.RawMessage(rawData), + Type: "page.navigation", + Category: CategoryPage, + SourceKind: SourceCDP, + Ts: 1, + Data: json.RawMessage(rawData), }) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - // Ring buffer event must have Truncated==true. ev, err := reader.Read(ctx) require.NoError(t, err) assert.True(t, ev.Truncated) + assert.True(t, json.Valid(ev.Data)) + + marshaled, err := json.Marshal(ev) + require.NoError(t, err) + assert.LessOrEqual(t, len(marshaled), maxS2RecordBytes) - // File must contain valid JSON with truncated==true. - data, err := os.ReadFile(filepath.Join(dir, "cdp.log")) + data, err := os.ReadFile(filepath.Join(dir, "page.log")) require.NoError(t, err) lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") require.Len(t, lines, 1) - assert.True(t, json.Valid([]byte(lines[0]))) assert.Contains(t, lines[0], `"truncated":true`) }) -} -// TestTruncation: construct event with Data = 1.1MB JSON bytes; call truncateIfNeeded; -// assert Truncated==true and json.Valid(result.Data)==true and len(marshal(result)) <= 1_000_000. -func TestTruncation(t *testing.T) { - // Build a Data field that is ~1.1MB - largeData := strings.Repeat("x", 1_100_000) - rawData, err := json.Marshal(map[string]string{"payload": largeData}) - require.NoError(t, err) + t.Run("defaults_detail_level", func(t *testing.T) { + p, _ := newPipeline(t) + reader := p.NewReader() - ev := BrowserEvent{ - CaptureSessionID: "test-session", - Seq: 1, - Ts: 1000, - Type: "cdp_event", - Data: json.RawMessage(rawData), - } + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) - result := truncateIfNeeded(ev) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() - assert.True(t, result.Truncated) - assert.True(t, json.Valid(result.Data)) + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, DetailDefault, ev.DetailLevel) - marshaled, err := json.Marshal(result) - require.NoError(t, err) - assert.LessOrEqual(t, len(marshaled), 1_000_000) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) + ev2, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, DetailVerbose, ev2.DetailLevel) + }) } diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index 11661150..ed2f3a58 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -13,13 +13,14 @@ type Pipeline struct { ring *RingBuffer files *FileWriter seq atomic.Uint64 - captureSessionID atomic.Value // stores string + captureSessionID atomic.Pointer[string] } // NewPipeline returns a Pipeline backed by the supplied ring and file writer. func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { p := &Pipeline{ring: ring, files: files} - p.captureSessionID.Store("") + empty := "" + p.captureSessionID.Store(&empty) return p } @@ -27,7 +28,7 @@ func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { // published event. It may be called at any time; the change is immediately // visible to concurrent Publish calls. func (p *Pipeline) Start(captureSessionID string) { - p.captureSessionID.Store(captureSessionID) + p.captureSessionID.Store(&captureSessionID) } // Publish stamps, truncates, files, and broadcasts a single event. @@ -41,17 +42,17 @@ func (p *Pipeline) Start(captureSessionID string) { // Errors from FileWriter.Write are silently dropped; the ring buffer always // receives the event even if the file write fails. func (p *Pipeline) Publish(ev BrowserEvent) { - ev.CaptureSessionID = p.captureSessionID.Load().(string) + ev.CaptureSessionID = *p.captureSessionID.Load() ev.Seq = p.seq.Add(1) // starts at 1 if ev.Ts == 0 { ev.Ts = time.Now().UnixMilli() } + if ev.DetailLevel == "" { + ev.DetailLevel = DetailDefault + } ev = truncateIfNeeded(ev) - // File write first — durable before in-memory. _ = p.files.Write(ev) - - // Ring buffer last — readers see the event after the file is written. p.ring.Publish(ev) } From 997edb4fdb8b6201b7c7b6cac4af5fbab5a0d756 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 27 Mar 2026 11:37:16 +0000 Subject: [PATCH 07/27] review: remove dead RingBuffer count field, fix FileWriter mutex doc, add concurrent publish+read race test --- server/lib/events/filewriter.go | 22 +++++++++------------- server/lib/events/ringbuffer.go | 14 +++++++------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/server/lib/events/filewriter.go b/server/lib/events/filewriter.go index 4b40d204..3d01b76c 100644 --- a/server/lib/events/filewriter.go +++ b/server/lib/events/filewriter.go @@ -1,7 +1,6 @@ package events import ( - "bytes" "encoding/json" "fmt" "os" @@ -10,8 +9,8 @@ import ( ) // FileWriter is a per-category JSONL appender. It opens each log file lazily on -// first write (O_APPEND|O_CREATE|O_WRONLY) and serialises concurrent writes -// within a category with a single mutex. +// first write (O_APPEND|O_CREATE|O_WRONLY) and serialises all concurrent writes +// with a single mutex. type FileWriter struct { mu sync.Mutex files map[EventCategory]*os.File @@ -25,16 +24,17 @@ func NewFileWriter(dir string) *FileWriter { } // Write serialises ev to JSON and appends it as a single JSONL line to the -// per-category log file. The mutex is held for the entire open+marshal+write -// sequence to prevent TOCTOU races and to guarantee whole-line atomicity for -// events larger than PIPE_BUF. +// per-category log file. The mutex guarantees whole-line atomicity across +// concurrent callers. func (fw *FileWriter) Write(ev BrowserEvent) error { - cat := CategoryFor(ev.Type) + cat := ev.Category + if cat == "" { + return fmt.Errorf("filewriter: event %q has empty category", ev.Type) + } fw.mu.Lock() defer fw.mu.Unlock() - // Lazy open. f, ok := fw.files[cat] if !ok { path := filepath.Join(fw.dir, string(cat)+".log") @@ -51,11 +51,7 @@ func (fw *FileWriter) Write(ev BrowserEvent) error { return fmt.Errorf("filewriter: marshal: %w", err) } - var buf bytes.Buffer - buf.Write(data) - buf.WriteByte('\n') - - if _, err := f.Write(buf.Bytes()); err != nil { + if _, err := f.Write(append(data, '\n')); err != nil { return fmt.Errorf("filewriter: write: %w", err) } diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 3911912e..384025c8 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -15,7 +15,6 @@ type RingBuffer struct { mu sync.RWMutex buf []BrowserEvent head int // next write position (mod cap) - count int // items currently stored (0..cap) written uint64 // total ever published (monotonic) notify chan struct{} } @@ -35,9 +34,6 @@ func (rb *RingBuffer) Publish(ev BrowserEvent) { rb.mu.Lock() rb.buf[rb.head] = ev rb.head = (rb.head + 1) % len(rb.buf) - if rb.count < len(rb.buf) { - rb.count++ - } rb.written++ old := rb.notify rb.notify = make(chan struct{}) @@ -54,7 +50,7 @@ func (rb *RingBuffer) oldestSeq() uint64 { return rb.written - uint64(len(rb.buf)) } -// NewReader returns a Reader positioned at seq 0. +// NewReader returns a Reader positioned at publish index 0 (the very beginning of the ring). // If the ring has already published events, the reader will receive an // events_dropped BrowserEvent on the first Read call if it has fallen behind // the oldest retained event. @@ -63,9 +59,13 @@ func (rb *RingBuffer) NewReader() *Reader { } // Reader tracks an independent read position in a RingBuffer. +// A Reader must not be used concurrently from multiple goroutines. +// +// nextSeq is a monotonic count of publishes consumed by this reader — it is +// an index into the ring, not the BrowserEvent.Seq field. type Reader struct { rb *RingBuffer - nextSeq uint64 + nextSeq uint64 // publish index, not BrowserEvent.Seq } // Read blocks until the next event is available or ctx is cancelled. @@ -85,7 +85,7 @@ func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { r.nextSeq = oldest r.rb.mu.RUnlock() data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return BrowserEvent{Type: "events_dropped", Data: data}, nil + return BrowserEvent{Type: "events.dropped", Category: CategorySystem, SourceKind: SourceKernelAPI, Data: data}, nil } // Event is available — read it. From e5153da1bc2e3d5c8dfcea4dfd84fa8c61b33cc3 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 27 Mar 2026 12:30:00 +0000 Subject: [PATCH 08/27] chore: clean up maxS2RecordBytes comment --- server/lib/events/event.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index b174147e..2b1569fc 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -4,9 +4,7 @@ import ( "encoding/json" ) -// maxS2RecordBytes is the S2 event pipeline maximum record size (1 MB). -// Events exceeding this limit have their Data field replaced with null and -// Truncated set to true before being written to the file and ring sinks. +// maxS2RecordBytes is the maximum record size for the S2 event pipeline (1 MB). const maxS2RecordBytes = 1_000_000 // EventCategory is a first-class envelope field that determines log file routing. From 1644fe726cb9f9de3a69e053f226dad4bbe76364 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Fri, 27 Mar 2026 12:30:03 +0000 Subject: [PATCH 09/27] fix: serialise Pipeline.Publish to guarantee monotonic seq delivery order --- server/lib/events/events_test.go | 36 +++++++++++++++++++++++++++++++- server/lib/events/pipeline.go | 9 +++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index deb390c0..dc6f05ec 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -15,7 +15,7 @@ import ( "github.com/stretchr/testify/require" ) -// TestBrowserEventSerialization: round-trip marshal/unmarshal verifying all SCHEMA-01 +// TestBrowserEventSerialization: round-trip marshal/unmarshal verifying all // envelope fields serialize with correct JSON keys and values, including provenance. func TestBrowserEventSerialization(t *testing.T) { ev := BrowserEvent{ @@ -393,6 +393,40 @@ func TestPipeline(t *testing.T) { return p, dir } + t.Run("concurrent_publish_seq_order", func(t *testing.T) { + const goroutines = 8 + const eventsEach = 50 + const total = goroutines * eventsEach + + // Ring must hold all events so no drop sentinels are emitted. + rb := NewRingBuffer(total) + fw := NewFileWriter(t.TempDir()) + p := NewPipeline(rb, fw) + t.Cleanup(func() { p.Close() }) + reader := p.NewReader() + + var wg sync.WaitGroup + for i := 0; i < goroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < eventsEach; j++ { + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) + } + }() + } + wg.Wait() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + for want := uint64(1); want <= total; want++ { + ev, err := reader.Read(ctx) + require.NoError(t, err) + assert.Equal(t, want, ev.Seq, "events must arrive in seq order") + } + }) + t.Run("publish_increments_seq", func(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index ed2f3a58..b7184abc 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -1,6 +1,7 @@ package events import ( + "sync" "sync/atomic" "time" ) @@ -10,6 +11,7 @@ import ( // applies truncation, durably appends it to the per-category log file, and // then makes it available to ring buffer readers. type Pipeline struct { + mu sync.Mutex ring *RingBuffer files *FileWriter seq atomic.Uint64 @@ -35,13 +37,18 @@ func (p *Pipeline) Start(captureSessionID string) { // // Ordering: // 1. Stamp CaptureSessionID, Seq, Ts (Ts only if caller left it zero) -// 2. Apply truncateIfNeeded (SCHEMA-04) — must happen before both sinks +// 2. Apply truncateIfNeeded — must happen before both sinks // 3. Write to FileWriter (durable before in-memory) // 4. Publish to RingBuffer (in-memory fan-out) // +// The mutex serialises concurrent callers so that seq assignment and sink +// delivery are atomic — readers always see events in seq order. // Errors from FileWriter.Write are silently dropped; the ring buffer always // receives the event even if the file write fails. func (p *Pipeline) Publish(ev BrowserEvent) { + p.mu.Lock() + defer p.mu.Unlock() + ev.CaptureSessionID = *p.captureSessionID.Load() ev.Seq = p.seq.Add(1) // starts at 1 if ev.Ts == 0 { From 36cff2dd9f0df9ab3c54261ad0966e48726629d1 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 30 Mar 2026 15:01:13 +0000 Subject: [PATCH 10/27] review --- server/lib/events/event.go | 48 ++++++-------- server/lib/events/events_test.go | 108 ++++++++++++++----------------- server/lib/events/filewriter.go | 20 ++---- server/lib/events/pipeline.go | 27 +++----- server/lib/events/ringbuffer.go | 24 ++----- 5 files changed, 89 insertions(+), 138 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 2b1569fc..53dfba67 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -7,7 +7,7 @@ import ( // maxS2RecordBytes is the maximum record size for the S2 event pipeline (1 MB). const maxS2RecordBytes = 1_000_000 -// EventCategory is a first-class envelope field that determines log file routing. +// EventCategory determines type of logging type EventCategory string const ( @@ -20,17 +20,15 @@ const ( CategorySystem EventCategory = "system" ) -// SourceKind identifies the provenance of an event — which subsystem produced it. -type SourceKind string +type Source string const ( - SourceCDP SourceKind = "cdp" - SourceKernelAPI SourceKind = "kernel_api" - SourceExtension SourceKind = "extension" - SourceLocalProcess SourceKind = "local_process" + SourceCDP Source = "cdp" + SourceKernelAPI Source = "kernel_api" + SourceExtension Source = "extension" + SourceLocalProcess Source = "local_process" ) -// DetailLevel controls the verbosity of the event payload. type DetailLevel string const ( @@ -41,23 +39,13 @@ const ( ) // BrowserEvent is the canonical event structure for the browser capture pipeline. -// -// The envelope is designed so that capture config and subscription selectors -// can operate on stable, first-class fields (Category, SourceKind, DetailLevel) -// without parsing the Type string. Type carries semantic identity (e.g. -// "console.log", "network.request"); SourceEvent carries the raw upstream -// event name (e.g. "Runtime.consoleAPICalled") for diagnostics. -// -// DetailLevel is always serialised (no omitempty). Pipeline.Publish defaults it -// to DetailDefault; callers constructing events outside a Pipeline should set it -// explicitly. type BrowserEvent struct { CaptureSessionID string `json:"capture_session_id"` Seq uint64 `json:"seq"` Ts int64 `json:"ts"` Type string `json:"type"` Category EventCategory `json:"category"` - SourceKind SourceKind `json:"source_kind"` + Source Source `json:"source"` SourceEvent string `json:"source_event,omitempty"` DetailLevel DetailLevel `json:"detail_level"` TargetID string `json:"target_id,omitempty"` @@ -69,20 +57,20 @@ type BrowserEvent struct { Truncated bool `json:"truncated,omitempty"` } -// truncateIfNeeded returns a copy of ev with Data replaced with json.RawMessage("null") -// and Truncated set to true if the marshaled size exceeds maxS2RecordBytes. -// Never attempt byte-slice truncation of the Data field — partial JSON is invalid. -func truncateIfNeeded(ev BrowserEvent) BrowserEvent { - candidate, err := json.Marshal(ev) +// truncateIfNeeded marshals ev and returns the (possibly truncated) event together +func truncateIfNeeded(ev BrowserEvent) (BrowserEvent, []byte) { + data, err := json.Marshal(ev) if err != nil { - // Marshal should never fail for BrowserEvent (all fields are JSON-safe), - // but if it does return ev unchanged rather than silently nulling Data. - return ev + return ev, data } - if len(candidate) <= maxS2RecordBytes { - return ev + if len(data) <= maxS2RecordBytes { + return ev, data } ev.Data = json.RawMessage("null") ev.Truncated = true - return ev + data, err = json.Marshal(ev) + if err != nil { + return ev, nil + } + return ev, data } diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index dc6f05ec..09c82e0c 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -15,8 +15,6 @@ import ( "github.com/stretchr/testify/require" ) -// TestBrowserEventSerialization: round-trip marshal/unmarshal verifying all -// envelope fields serialize with correct JSON keys and values, including provenance. func TestBrowserEventSerialization(t *testing.T) { ev := BrowserEvent{ CaptureSessionID: "test-session-id", @@ -24,7 +22,7 @@ func TestBrowserEventSerialization(t *testing.T) { Ts: 1234567890000, Type: "console.log", Category: CategoryConsole, - SourceKind: SourceCDP, + Source: SourceCDP, SourceEvent: "Runtime.consoleAPICalled", DetailLevel: DetailDefault, TargetID: "target-1", @@ -43,7 +41,7 @@ func TestBrowserEventSerialization(t *testing.T) { assert.Equal(t, "console.log", decoded["type"]) assert.Equal(t, "console", decoded["category"]) - assert.Equal(t, "cdp", decoded["source_kind"]) + assert.Equal(t, "cdp", decoded["source"]) assert.Equal(t, "Runtime.consoleAPICalled", decoded["source_event"]) assert.Equal(t, "default", decoded["detail_level"]) assert.Equal(t, "test-session-id", decoded["capture_session_id"]) @@ -55,8 +53,6 @@ func TestBrowserEventSerialization(t *testing.T) { assert.Equal(t, "https://example.com", decoded["url"]) } -// TestBrowserEventData: embed a pre-serialized JSON object in Data field; marshal outer event; -// assert Data appears verbatim (no double-encoding). func TestBrowserEventData(t *testing.T) { rawData := json.RawMessage(`{"key":"value","num":42}`) ev := BrowserEvent{ @@ -65,7 +61,7 @@ func TestBrowserEventData(t *testing.T) { Ts: 1000, Type: "page.navigation", Category: CategoryPage, - SourceKind: SourceCDP, + Source: SourceCDP, Data: rawData, } @@ -74,10 +70,9 @@ func TestBrowserEventData(t *testing.T) { s := string(b) assert.Contains(t, s, `"data":{"key":"value","num":42}`) - assert.NotContains(t, s, `"data":"{`) // would indicate double-encoding + assert.NotContains(t, s, `"data":"{`) } -// TestBrowserEventOmitEmpty: source_event is omitted when empty; detail_level always present. func TestBrowserEventOmitEmpty(t *testing.T) { ev := BrowserEvent{ CaptureSessionID: "sess", @@ -85,7 +80,7 @@ func TestBrowserEventOmitEmpty(t *testing.T) { Ts: 1000, Type: "console.log", Category: CategoryConsole, - SourceKind: SourceCDP, + Source: SourceCDP, } b, err := json.Marshal(ev) @@ -93,19 +88,18 @@ func TestBrowserEventOmitEmpty(t *testing.T) { s := string(b) assert.NotContains(t, s, `"source_event"`) - // detail_level is always serialized (not omitempty) — zero value is "" assert.Contains(t, s, `"detail_level"`) } -// TestRingBuffer: publish 3 events; reader reads all 3 in order. +// TestRingBuffer: publish 3 events; reader reads all 3 in order func TestRingBuffer(t *testing.T) { rb := NewRingBuffer(10) reader := rb.NewReader() events := []BrowserEvent{ - {Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}, - {Seq: 2, Type: "network.request", Category: CategoryNetwork, SourceKind: SourceCDP}, - {Seq: 3, Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP}, + {Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}, + {Seq: 2, Type: "network.request", Category: CategoryNetwork, Source: SourceCDP}, + {Seq: 3, Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}, } for _, ev := range events { @@ -123,16 +117,15 @@ func TestRingBuffer(t *testing.T) { } } -// TestRingBufferOverflowNoBlock: writer never blocks even with no readers; -// late-joining reader gets events.dropped with correct envelope fields. +// TestRingBufferOverflowNoBlock: writer never blocks even with no readers func TestRingBufferOverflowNoBlock(t *testing.T) { rb := NewRingBuffer(2) done := make(chan struct{}) go func() { - rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) close(done) }() @@ -150,18 +143,16 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { require.NoError(t, err) assert.Equal(t, "events.dropped", first.Type) assert.Equal(t, CategorySystem, first.Category) - assert.Equal(t, SourceKernelAPI, first.SourceKind) + assert.Equal(t, SourceKernelAPI, first.Source) } -// TestRingBufferOverflowExistingReader: reader created before overflow -// gets events.dropped with exact count, then continues reading. func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) reader := rb.NewReader() - rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -176,7 +167,6 @@ func TestRingBufferOverflowExistingReader(t *testing.T) { assert.JSONEq(t, `{"dropped":1}`, string(first.Data)) // After the drop sentinel the reader continues with the surviving events - // (seq 2 and 3, which fit in the capacity-2 buffer). second, err := reader.Read(ctx) require.NoError(t, err) assert.Equal(t, uint64(2), second.Seq) @@ -186,8 +176,6 @@ func TestRingBufferOverflowExistingReader(t *testing.T) { assert.Equal(t, uint64(3), third.Seq) } -// TestConcurrentPublishRead: readers blocked on Read while a writer publishes -// concurrently — exercises locking and notify paths under go test -race. func TestConcurrentPublishRead(t *testing.T) { const numEvents = 20 rb := NewRingBuffer(32) @@ -199,7 +187,6 @@ func TestConcurrentPublishRead(t *testing.T) { var wg sync.WaitGroup - // Reader goroutine: reads numEvents events. wg.Add(1) go func() { defer wg.Done() @@ -211,7 +198,6 @@ func TestConcurrentPublishRead(t *testing.T) { } }() - // Writer goroutine: publishes numEvents events. wg.Add(1) go func() { defer wg.Done() @@ -220,7 +206,7 @@ func TestConcurrentPublishRead(t *testing.T) { Seq: uint64(i), Type: "console.log", Category: CategoryConsole, - SourceKind: SourceCDP, + Source: SourceCDP, }) } }() @@ -228,8 +214,6 @@ func TestConcurrentPublishRead(t *testing.T) { wg.Wait() } -// TestConcurrentReaders: 3 readers subscribe before publish; publish 5 events; -// each reader independently reads all 5; no reader affects another. func TestConcurrentReaders(t *testing.T) { rb := NewRingBuffer(20) @@ -242,7 +226,7 @@ func TestConcurrentReaders(t *testing.T) { } for i := 0; i < numEvents; i++ { - rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP}) + rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) } var wg sync.WaitGroup @@ -289,17 +273,19 @@ func TestFileWriter(t *testing.T) { file string category string }{ - {BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, - {BrowserEvent{Type: "network.request", Category: CategoryNetwork, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, - {BrowserEvent{Type: "liveview.click", Category: CategoryLiveview, SourceKind: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, - {BrowserEvent{Type: "captcha.solve", Category: CategoryCaptcha, SourceKind: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, - {BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, - {BrowserEvent{Type: "input.click", Category: CategoryInteraction, SourceKind: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, - {BrowserEvent{Type: "monitor.connected", Category: CategorySystem, SourceKind: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, + {BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, + {BrowserEvent{Type: "network.request", Category: CategoryNetwork, Source: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, + {BrowserEvent{Type: "liveview.click", Category: CategoryLiveview, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, + {BrowserEvent{Type: "captcha.solve", Category: CategoryCaptcha, Source: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, + {BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, + {BrowserEvent{Type: "input.click", Category: CategoryInteraction, Source: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, + {BrowserEvent{Type: "monitor.connected", Category: CategorySystem, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, } for _, e := range eventsToFile { - require.NoError(t, fw.Write(e.ev)) + data, err := json.Marshal(e.ev) + require.NoError(t, err) + require.NoError(t, fw.Write(e.ev, data)) } for _, e := range eventsToFile { @@ -312,7 +298,7 @@ func TestFileWriter(t *testing.T) { var decoded map[string]any require.NoError(t, json.Unmarshal(line, &decoded)) assert.Equal(t, e.category, decoded["category"], "wrong category in %s", e.file) - assert.Equal(t, string(e.ev.SourceKind), decoded["source_kind"], "wrong source_kind in %s", e.file) + assert.Equal(t, string(e.ev.Source), decoded["source"], "wrong source in %s", e.file) } }) @@ -321,7 +307,9 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - err := fw.Write(BrowserEvent{Type: "mystery", Category: "", SourceKind: SourceCDP, Seq: 1, Ts: 1}) + ev := BrowserEvent{Type: "mystery", Category: "", Source: SourceCDP, Seq: 1, Ts: 1} + data, _ := json.Marshal(ev) + err := fw.Write(ev, data) require.Error(t, err) assert.Contains(t, err.Error(), "empty category") }) @@ -344,10 +332,12 @@ func TestFileWriter(t *testing.T) { Seq: uint64(i*eventsPerGoroutine + j), Type: "console.log", Category: CategoryConsole, - SourceKind: SourceCDP, + Source: SourceCDP, Ts: 1, } - require.NoError(t, fw.Write(ev)) + evData, err := json.Marshal(ev) + require.NoError(t, err) + require.NoError(t, fw.Write(ev, evData)) } }(i) } @@ -372,7 +362,10 @@ func TestFileWriter(t *testing.T) { require.NoError(t, err) assert.Empty(t, entries, "files opened before first Write") - require.NoError(t, fw.Write(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Seq: 1, Ts: 1})) + lazyEv := BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1} + lazyData, err := json.Marshal(lazyEv) + require.NoError(t, err) + require.NoError(t, fw.Write(lazyEv, lazyData)) entries, err = os.ReadDir(dir) require.NoError(t, err) @@ -381,7 +374,6 @@ func TestFileWriter(t *testing.T) { }) } -// TestPipeline: Pipeline glue type tests. func TestPipeline(t *testing.T) { newPipeline := func(t *testing.T) (*Pipeline, string) { t.Helper() @@ -411,7 +403,7 @@ func TestPipeline(t *testing.T) { go func() { defer wg.Done() for j := 0; j < eventsEach; j++ { - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) } }() } @@ -432,7 +424,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() for i := 0; i < 3; i++ { - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) } ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -450,7 +442,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() before := time.Now().UnixMilli() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP}) // Ts == 0 + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}) // Ts == 0 after := time.Now().UnixMilli() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -465,7 +457,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_writes_file", func(t *testing.T) { p, dir := newPipeline(t) - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) data, err := os.ReadFile(filepath.Join(dir, "console.log")) require.NoError(t, err) @@ -480,7 +472,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -496,7 +488,7 @@ func TestPipeline(t *testing.T) { p.Start("test-uuid") reader := p.NewReader() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -517,7 +509,7 @@ func TestPipeline(t *testing.T) { p.Publish(BrowserEvent{ Type: "page.navigation", Category: CategoryPage, - SourceKind: SourceCDP, + Source: SourceCDP, Ts: 1, Data: json.RawMessage(rawData), }) @@ -545,7 +537,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1}) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -554,7 +546,7 @@ func TestPipeline(t *testing.T) { require.NoError(t, err) assert.Equal(t, DetailDefault, ev.DetailLevel) - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, SourceKind: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) + p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) ev2, err := reader.Read(ctx) require.NoError(t, err) assert.Equal(t, DetailVerbose, ev2.DetailLevel) diff --git a/server/lib/events/filewriter.go b/server/lib/events/filewriter.go index 3d01b76c..87bad172 100644 --- a/server/lib/events/filewriter.go +++ b/server/lib/events/filewriter.go @@ -1,7 +1,6 @@ package events import ( - "encoding/json" "fmt" "os" "path/filepath" @@ -10,23 +9,20 @@ import ( // FileWriter is a per-category JSONL appender. It opens each log file lazily on // first write (O_APPEND|O_CREATE|O_WRONLY) and serialises all concurrent writes -// with a single mutex. +// with a single mutex type FileWriter struct { mu sync.Mutex files map[EventCategory]*os.File dir string } -// NewFileWriter returns a FileWriter that writes to dir. -// No files are opened until the first Write call. +// NewFileWriter returns a FileWriter that writes to dir func NewFileWriter(dir string) *FileWriter { return &FileWriter{dir: dir, files: make(map[EventCategory]*os.File)} } -// Write serialises ev to JSON and appends it as a single JSONL line to the -// per-category log file. The mutex guarantees whole-line atomicity across -// concurrent callers. -func (fw *FileWriter) Write(ev BrowserEvent) error { +// Write appends data as a single JSONL line to the per-category log file for ev +func (fw *FileWriter) Write(ev BrowserEvent, data []byte) error { cat := ev.Category if cat == "" { return fmt.Errorf("filewriter: event %q has empty category", ev.Type) @@ -46,11 +42,6 @@ func (fw *FileWriter) Write(ev BrowserEvent) error { fw.files[cat] = f } - data, err := json.Marshal(ev) - if err != nil { - return fmt.Errorf("filewriter: marshal: %w", err) - } - if _, err := f.Write(append(data, '\n')); err != nil { return fmt.Errorf("filewriter: write: %w", err) } @@ -58,8 +49,7 @@ func (fw *FileWriter) Write(ev BrowserEvent) error { return nil } -// Close closes all open log file descriptors. The first encountered error is -// returned; subsequent files are still closed. +// Close closes all open log file descriptors func (fw *FileWriter) Close() error { fw.mu.Lock() defer fw.mu.Unlock() diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index b7184abc..c6f93dcb 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -1,15 +1,13 @@ package events import ( + "log/slog" "sync" "sync/atomic" "time" ) -// Pipeline glues a RingBuffer and a FileWriter into a single write path. -// A single call to Publish stamps the event with a monotonic sequence number, -// applies truncation, durably appends it to the per-category log file, and -// then makes it available to ring buffer readers. +// Pipeline glues a RingBuffer and a FileWriter into a single write path type Pipeline struct { mu sync.Mutex ring *RingBuffer @@ -18,7 +16,6 @@ type Pipeline struct { captureSessionID atomic.Pointer[string] } -// NewPipeline returns a Pipeline backed by the supplied ring and file writer. func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { p := &Pipeline{ring: ring, files: files} empty := "" @@ -27,8 +24,7 @@ func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { } // Start sets the capture session ID that will be stamped on every subsequent -// published event. It may be called at any time; the change is immediately -// visible to concurrent Publish calls. +// published event func (p *Pipeline) Start(captureSessionID string) { p.captureSessionID.Store(&captureSessionID) } @@ -40,36 +36,33 @@ func (p *Pipeline) Start(captureSessionID string) { // 2. Apply truncateIfNeeded — must happen before both sinks // 3. Write to FileWriter (durable before in-memory) // 4. Publish to RingBuffer (in-memory fan-out) -// -// The mutex serialises concurrent callers so that seq assignment and sink -// delivery are atomic — readers always see events in seq order. -// Errors from FileWriter.Write are silently dropped; the ring buffer always -// receives the event even if the file write fails. func (p *Pipeline) Publish(ev BrowserEvent) { p.mu.Lock() defer p.mu.Unlock() ev.CaptureSessionID = *p.captureSessionID.Load() - ev.Seq = p.seq.Add(1) // starts at 1 + ev.Seq = p.seq.Add(1) if ev.Ts == 0 { ev.Ts = time.Now().UnixMilli() } if ev.DetailLevel == "" { ev.DetailLevel = DetailDefault } - ev = truncateIfNeeded(ev) + ev, data := truncateIfNeeded(ev) - _ = p.files.Write(ev) + if err := p.files.Write(ev, data); err != nil { + slog.Error("pipeline: file write failed", "seq", ev.Seq, "category", ev.Category, "err", err) + } p.ring.Publish(ev) } -// NewReader returns a Reader positioned at the start of the ring buffer. +// NewReader returns a Reader positioned at the start of the ring buffer func (p *Pipeline) NewReader() *Reader { return p.ring.NewReader() } // Close closes the underlying FileWriter, flushing and releasing all open -// file descriptors. +// file descriptors func (p *Pipeline) Close() error { return p.files.Close() } diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 384025c8..8d0b48ab 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -19,7 +19,6 @@ type RingBuffer struct { notify chan struct{} } -// NewRingBuffer creates a new RingBuffer with the given capacity. func NewRingBuffer(capacity int) *RingBuffer { return &RingBuffer{ buf: make([]BrowserEvent, capacity), @@ -29,7 +28,7 @@ func NewRingBuffer(capacity int) *RingBuffer { // Publish adds an event to the ring buffer, evicting the oldest entry on overflow. // Closes the current notify channel (waking all waiting readers) and replaces it -// with a new one — outside the lock to avoid blocking under contention. +// with a new one, outside the lock to avoid blocking under contention func (rb *RingBuffer) Publish(ev BrowserEvent) { rb.mu.Lock() rb.buf[rb.head] = ev @@ -41,8 +40,7 @@ func (rb *RingBuffer) Publish(ev BrowserEvent) { close(old) // outside lock to avoid blocking under contention } -// oldestSeq returns the seq of the oldest event still in the ring. -// Must be called under at least a read lock. +// oldestSeq returns the seq of the oldest event still in the ring func (rb *RingBuffer) oldestSeq() uint64 { if rb.written <= uint64(len(rb.buf)) { return 0 @@ -50,28 +48,21 @@ func (rb *RingBuffer) oldestSeq() uint64 { return rb.written - uint64(len(rb.buf)) } -// NewReader returns a Reader positioned at publish index 0 (the very beginning of the ring). +// NewReader returns a Reader positioned at publish index 0 // If the ring has already published events, the reader will receive an // events_dropped BrowserEvent on the first Read call if it has fallen behind -// the oldest retained event. +// the oldest retained event func (rb *RingBuffer) NewReader() *Reader { return &Reader{rb: rb, nextSeq: 0} } // Reader tracks an independent read position in a RingBuffer. -// A Reader must not be used concurrently from multiple goroutines. -// -// nextSeq is a monotonic count of publishes consumed by this reader — it is -// an index into the ring, not the BrowserEvent.Seq field. type Reader struct { rb *RingBuffer nextSeq uint64 // publish index, not BrowserEvent.Seq } -// Read blocks until the next event is available or ctx is cancelled. -// Returns (event, nil) for a normal event. -// Returns (events_dropped BrowserEvent, nil) if the reader has fallen behind -// the ring's oldest retained event — the dropped count is in Data as valid JSON. +// Read blocks until the next event is available or ctx is cancelled func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { for { r.rb.mu.RLock() @@ -79,16 +70,14 @@ func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { oldest := r.rb.oldestSeq() written := r.rb.written - // Reader fell behind — synthesize events_dropped before advancing. if r.nextSeq < oldest { dropped := oldest - r.nextSeq r.nextSeq = oldest r.rb.mu.RUnlock() data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return BrowserEvent{Type: "events.dropped", Category: CategorySystem, SourceKind: SourceKernelAPI, Data: data}, nil + return BrowserEvent{Type: "events.dropped", Category: CategorySystem, Source: SourceKernelAPI, Data: data}, nil } - // Event is available — read it. if r.nextSeq < written { idx := int(r.nextSeq % uint64(len(r.rb.buf))) ev := r.rb.buf[idx] @@ -97,7 +86,6 @@ func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { return ev, nil } - // No event yet — wait for notification. r.rb.mu.RUnlock() select { From 339d7d396fb90fafd94dbd739e9dc2d038c2f14b Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:04:59 +0000 Subject: [PATCH 11/27] refactor: rename BrowserEvent to Event, DetailDefault to DetailStandard Event is the agreed portable name. DetailStandard avoids Go keyword ambiguity with "default". --- server/lib/events/event.go | 8 ++-- server/lib/events/events_test.go | 80 ++++++++++++++++---------------- server/lib/events/filewriter.go | 2 +- server/lib/events/pipeline.go | 4 +- server/lib/events/ringbuffer.go | 18 +++---- 5 files changed, 56 insertions(+), 56 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 53dfba67..3d88369c 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -33,13 +33,13 @@ type DetailLevel string const ( DetailMinimal DetailLevel = "minimal" - DetailDefault DetailLevel = "default" + DetailStandard DetailLevel = "standard" DetailVerbose DetailLevel = "verbose" DetailRaw DetailLevel = "raw" ) -// BrowserEvent is the canonical event structure for the browser capture pipeline. -type BrowserEvent struct { +// Event is the canonical event structure for the capture pipeline. +type Event struct { CaptureSessionID string `json:"capture_session_id"` Seq uint64 `json:"seq"` Ts int64 `json:"ts"` @@ -58,7 +58,7 @@ type BrowserEvent struct { } // truncateIfNeeded marshals ev and returns the (possibly truncated) event together -func truncateIfNeeded(ev BrowserEvent) (BrowserEvent, []byte) { +func truncateIfNeeded(ev Event) (Event, []byte) { data, err := json.Marshal(ev) if err != nil { return ev, data diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index 09c82e0c..c60252a3 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -15,8 +15,8 @@ import ( "github.com/stretchr/testify/require" ) -func TestBrowserEventSerialization(t *testing.T) { - ev := BrowserEvent{ +func TestEventSerialization(t *testing.T) { + ev := Event{ CaptureSessionID: "test-session-id", Seq: 1, Ts: 1234567890000, @@ -24,7 +24,7 @@ func TestBrowserEventSerialization(t *testing.T) { Category: CategoryConsole, Source: SourceCDP, SourceEvent: "Runtime.consoleAPICalled", - DetailLevel: DetailDefault, + DetailLevel: DetailStandard, TargetID: "target-1", CDPSessionID: "cdp-session-1", FrameID: "frame-1", @@ -43,7 +43,7 @@ func TestBrowserEventSerialization(t *testing.T) { assert.Equal(t, "console", decoded["category"]) assert.Equal(t, "cdp", decoded["source"]) assert.Equal(t, "Runtime.consoleAPICalled", decoded["source_event"]) - assert.Equal(t, "default", decoded["detail_level"]) + assert.Equal(t, "standard", decoded["detail_level"]) assert.Equal(t, "test-session-id", decoded["capture_session_id"]) assert.Equal(t, float64(1), decoded["seq"]) assert.Equal(t, "target-1", decoded["target_id"]) @@ -53,9 +53,9 @@ func TestBrowserEventSerialization(t *testing.T) { assert.Equal(t, "https://example.com", decoded["url"]) } -func TestBrowserEventData(t *testing.T) { +func TestEventData(t *testing.T) { rawData := json.RawMessage(`{"key":"value","num":42}`) - ev := BrowserEvent{ + ev := Event{ CaptureSessionID: "test-session", Seq: 1, Ts: 1000, @@ -73,8 +73,8 @@ func TestBrowserEventData(t *testing.T) { assert.NotContains(t, s, `"data":"{`) } -func TestBrowserEventOmitEmpty(t *testing.T) { - ev := BrowserEvent{ +func TestEventOmitEmpty(t *testing.T) { + ev := Event{ CaptureSessionID: "sess", Seq: 1, Ts: 1000, @@ -96,7 +96,7 @@ func TestRingBuffer(t *testing.T) { rb := NewRingBuffer(10) reader := rb.NewReader() - events := []BrowserEvent{ + events := []Event{ {Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}, {Seq: 2, Type: "network.request", Category: CategoryNetwork, Source: SourceCDP}, {Seq: 3, Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}, @@ -123,9 +123,9 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { done := make(chan struct{}) go func() { - rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) close(done) }() @@ -150,9 +150,9 @@ func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) reader := rb.NewReader() - rb.Publish(BrowserEvent{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(BrowserEvent{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -202,7 +202,7 @@ func TestConcurrentPublishRead(t *testing.T) { go func() { defer wg.Done() for i := 1; i <= numEvents; i++ { - rb.Publish(BrowserEvent{ + rb.Publish(Event{ Seq: uint64(i), Type: "console.log", Category: CategoryConsole, @@ -226,11 +226,11 @@ func TestConcurrentReaders(t *testing.T) { } for i := 0; i < numEvents; i++ { - rb.Publish(BrowserEvent{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) } var wg sync.WaitGroup - results := make([][]BrowserEvent, numReaders) + results := make([][]Event, numReaders) for i, r := range readers { wg.Add(1) @@ -239,7 +239,7 @@ func TestConcurrentReaders(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - var evs []BrowserEvent + var evs []Event for j := 0; j < numEvents; j++ { ev, err := reader.Read(ctx) if !assert.NoError(t, err) { @@ -269,17 +269,17 @@ func TestFileWriter(t *testing.T) { defer fw.Close() eventsToFile := []struct { - ev BrowserEvent + ev Event file string category string }{ - {BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, - {BrowserEvent{Type: "network.request", Category: CategoryNetwork, Source: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, - {BrowserEvent{Type: "liveview.click", Category: CategoryLiveview, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, - {BrowserEvent{Type: "captcha.solve", Category: CategoryCaptcha, Source: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, - {BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, - {BrowserEvent{Type: "input.click", Category: CategoryInteraction, Source: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, - {BrowserEvent{Type: "monitor.connected", Category: CategorySystem, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, + {Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, + {Event{Type: "network.request", Category: CategoryNetwork, Source: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, + {Event{Type: "liveview.click", Category: CategoryLiveview, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, + {Event{Type: "captcha.solve", Category: CategoryCaptcha, Source: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, + {Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, + {Event{Type: "input.click", Category: CategoryInteraction, Source: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, + {Event{Type: "monitor.connected", Category: CategorySystem, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, } for _, e := range eventsToFile { @@ -307,7 +307,7 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - ev := BrowserEvent{Type: "mystery", Category: "", Source: SourceCDP, Seq: 1, Ts: 1} + ev := Event{Type: "mystery", Category: "", Source: SourceCDP, Seq: 1, Ts: 1} data, _ := json.Marshal(ev) err := fw.Write(ev, data) require.Error(t, err) @@ -328,7 +328,7 @@ func TestFileWriter(t *testing.T) { go func(i int) { defer wg.Done() for j := 0; j < eventsPerGoroutine; j++ { - ev := BrowserEvent{ + ev := Event{ Seq: uint64(i*eventsPerGoroutine + j), Type: "console.log", Category: CategoryConsole, @@ -362,7 +362,7 @@ func TestFileWriter(t *testing.T) { require.NoError(t, err) assert.Empty(t, entries, "files opened before first Write") - lazyEv := BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1} + lazyEv := Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1} lazyData, err := json.Marshal(lazyEv) require.NoError(t, err) require.NoError(t, fw.Write(lazyEv, lazyData)) @@ -403,7 +403,7 @@ func TestPipeline(t *testing.T) { go func() { defer wg.Done() for j := 0; j < eventsEach; j++ { - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) } }() } @@ -424,7 +424,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() for i := 0; i < 3; i++ { - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) } ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -442,7 +442,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() before := time.Now().UnixMilli() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}) // Ts == 0 + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}) // Ts == 0 after := time.Now().UnixMilli() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -457,7 +457,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_writes_file", func(t *testing.T) { p, dir := newPipeline(t) - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) data, err := os.ReadFile(filepath.Join(dir, "console.log")) require.NoError(t, err) @@ -472,7 +472,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -488,7 +488,7 @@ func TestPipeline(t *testing.T) { p.Start("test-uuid") reader := p.NewReader() - p.Publish(BrowserEvent{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -506,7 +506,7 @@ func TestPipeline(t *testing.T) { rawData, err := json.Marshal(map[string]string{"payload": largeData}) require.NoError(t, err) - p.Publish(BrowserEvent{ + p.Publish(Event{ Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, @@ -537,16 +537,16 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() ev, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, DetailDefault, ev.DetailLevel) + assert.Equal(t, DetailStandard, ev.DetailLevel) - p.Publish(BrowserEvent{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) ev2, err := reader.Read(ctx) require.NoError(t, err) assert.Equal(t, DetailVerbose, ev2.DetailLevel) diff --git a/server/lib/events/filewriter.go b/server/lib/events/filewriter.go index 87bad172..cab3133a 100644 --- a/server/lib/events/filewriter.go +++ b/server/lib/events/filewriter.go @@ -22,7 +22,7 @@ func NewFileWriter(dir string) *FileWriter { } // Write appends data as a single JSONL line to the per-category log file for ev -func (fw *FileWriter) Write(ev BrowserEvent, data []byte) error { +func (fw *FileWriter) Write(ev Event, data []byte) error { cat := ev.Category if cat == "" { return fmt.Errorf("filewriter: event %q has empty category", ev.Type) diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index c6f93dcb..1c3d31e8 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -36,7 +36,7 @@ func (p *Pipeline) Start(captureSessionID string) { // 2. Apply truncateIfNeeded — must happen before both sinks // 3. Write to FileWriter (durable before in-memory) // 4. Publish to RingBuffer (in-memory fan-out) -func (p *Pipeline) Publish(ev BrowserEvent) { +func (p *Pipeline) Publish(ev Event) { p.mu.Lock() defer p.mu.Unlock() @@ -46,7 +46,7 @@ func (p *Pipeline) Publish(ev BrowserEvent) { ev.Ts = time.Now().UnixMilli() } if ev.DetailLevel == "" { - ev.DetailLevel = DetailDefault + ev.DetailLevel = DetailStandard } ev, data := truncateIfNeeded(ev) diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 8d0b48ab..44e54f39 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -10,10 +10,10 @@ import ( // RingBuffer is a fixed-capacity circular buffer with closed-channel broadcast fan-out. // Writers never block regardless of reader count or speed. // Readers track their position by seq value (not ring index) and receive an -// events_dropped synthetic BrowserEvent when they fall behind the oldest retained event. +// events_dropped synthetic Event when they fall behind the oldest retained event. type RingBuffer struct { mu sync.RWMutex - buf []BrowserEvent + buf []Event head int // next write position (mod cap) written uint64 // total ever published (monotonic) notify chan struct{} @@ -21,7 +21,7 @@ type RingBuffer struct { func NewRingBuffer(capacity int) *RingBuffer { return &RingBuffer{ - buf: make([]BrowserEvent, capacity), + buf: make([]Event, capacity), notify: make(chan struct{}), } } @@ -29,7 +29,7 @@ func NewRingBuffer(capacity int) *RingBuffer { // Publish adds an event to the ring buffer, evicting the oldest entry on overflow. // Closes the current notify channel (waking all waiting readers) and replaces it // with a new one, outside the lock to avoid blocking under contention -func (rb *RingBuffer) Publish(ev BrowserEvent) { +func (rb *RingBuffer) Publish(ev Event) { rb.mu.Lock() rb.buf[rb.head] = ev rb.head = (rb.head + 1) % len(rb.buf) @@ -50,7 +50,7 @@ func (rb *RingBuffer) oldestSeq() uint64 { // NewReader returns a Reader positioned at publish index 0 // If the ring has already published events, the reader will receive an -// events_dropped BrowserEvent on the first Read call if it has fallen behind +// events_dropped Event on the first Read call if it has fallen behind // the oldest retained event func (rb *RingBuffer) NewReader() *Reader { return &Reader{rb: rb, nextSeq: 0} @@ -59,11 +59,11 @@ func (rb *RingBuffer) NewReader() *Reader { // Reader tracks an independent read position in a RingBuffer. type Reader struct { rb *RingBuffer - nextSeq uint64 // publish index, not BrowserEvent.Seq + nextSeq uint64 // publish index, not Event.Seq } // Read blocks until the next event is available or ctx is cancelled -func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { +func (r *Reader) Read(ctx context.Context) (Event, error) { for { r.rb.mu.RLock() notify := r.rb.notify @@ -75,7 +75,7 @@ func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { r.nextSeq = oldest r.rb.mu.RUnlock() data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return BrowserEvent{Type: "events.dropped", Category: CategorySystem, Source: SourceKernelAPI, Data: data}, nil + return Event{Type: "events.dropped", Category: CategorySystem, Source: SourceKernelAPI, Data: data}, nil } if r.nextSeq < written { @@ -90,7 +90,7 @@ func (r *Reader) Read(ctx context.Context) (BrowserEvent, error) { select { case <-ctx.Done(): - return BrowserEvent{}, ctx.Err() + return Event{}, ctx.Err() case <-notify: // new event available; loop to read it } From b370416b5683a926fd78d51f064f317f33bc8891 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:06:32 +0000 Subject: [PATCH 12/27] refactor: restructure Source as nested object with Kind, Event, Metadata Moves CDP-specific fields (target_id, cdp_session_id, frame_id, parent_frame_id) under source.metadata. Top-level Event schema now contains only stable cross-producer fields. --- server/lib/events/event.go | 23 ++++--- server/lib/events/events_test.go | 111 +++++++++++++++++-------------- server/lib/events/ringbuffer.go | 2 +- 3 files changed, 74 insertions(+), 62 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 3d88369c..358288a7 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -20,15 +20,23 @@ const ( CategorySystem EventCategory = "system" ) -type Source string +type SourceKind string const ( - SourceCDP Source = "cdp" - SourceKernelAPI Source = "kernel_api" - SourceExtension Source = "extension" - SourceLocalProcess Source = "local_process" + KindCDP SourceKind = "cdp" + KindKernelAPI SourceKind = "kernel_api" + KindExtension SourceKind = "extension" + KindLocalProcess SourceKind = "local_process" ) +// Source captures provenance: which producer emitted the event and any +// producer-specific context (e.g. CDP target/session/frame IDs). +type Source struct { + Kind SourceKind `json:"kind"` + Event string `json:"event,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + type DetailLevel string const ( @@ -46,12 +54,7 @@ type Event struct { Type string `json:"type"` Category EventCategory `json:"category"` Source Source `json:"source"` - SourceEvent string `json:"source_event,omitempty"` DetailLevel DetailLevel `json:"detail_level"` - TargetID string `json:"target_id,omitempty"` - CDPSessionID string `json:"cdp_session_id,omitempty"` - FrameID string `json:"frame_id,omitempty"` - ParentFrameID string `json:"parent_frame_id,omitempty"` URL string `json:"url,omitempty"` Data json.RawMessage `json:"data,omitempty"` Truncated bool `json:"truncated,omitempty"` diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index c60252a3..0099f154 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -22,15 +22,19 @@ func TestEventSerialization(t *testing.T) { Ts: 1234567890000, Type: "console.log", Category: CategoryConsole, - Source: SourceCDP, - SourceEvent: "Runtime.consoleAPICalled", - DetailLevel: DetailStandard, - TargetID: "target-1", - CDPSessionID: "cdp-session-1", - FrameID: "frame-1", - ParentFrameID: "parent-frame-1", - URL: "https://example.com", - Data: json.RawMessage(`{"message":"hello"}`), + Source: Source{ + Kind: KindCDP, + Event: "Runtime.consoleAPICalled", + Metadata: map[string]string{ + "target_id": "target-1", + "cdp_session_id": "cdp-session-1", + "frame_id": "frame-1", + "parent_frame_id": "parent-frame-1", + }, + }, + DetailLevel: DetailStandard, + URL: "https://example.com", + Data: json.RawMessage(`{"message":"hello"}`), } b, err := json.Marshal(ev) @@ -41,16 +45,19 @@ func TestEventSerialization(t *testing.T) { assert.Equal(t, "console.log", decoded["type"]) assert.Equal(t, "console", decoded["category"]) - assert.Equal(t, "cdp", decoded["source"]) - assert.Equal(t, "Runtime.consoleAPICalled", decoded["source_event"]) assert.Equal(t, "standard", decoded["detail_level"]) assert.Equal(t, "test-session-id", decoded["capture_session_id"]) assert.Equal(t, float64(1), decoded["seq"]) - assert.Equal(t, "target-1", decoded["target_id"]) - assert.Equal(t, "cdp-session-1", decoded["cdp_session_id"]) - assert.Equal(t, "frame-1", decoded["frame_id"]) - assert.Equal(t, "parent-frame-1", decoded["parent_frame_id"]) assert.Equal(t, "https://example.com", decoded["url"]) + + src, ok := decoded["source"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "cdp", src["kind"]) + assert.Equal(t, "Runtime.consoleAPICalled", src["event"]) + meta, ok := src["metadata"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "target-1", meta["target_id"]) + assert.Equal(t, "cdp-session-1", meta["cdp_session_id"]) } func TestEventData(t *testing.T) { @@ -61,8 +68,8 @@ func TestEventData(t *testing.T) { Ts: 1000, Type: "page.navigation", Category: CategoryPage, - Source: SourceCDP, - Data: rawData, + Source: Source{Kind: KindCDP}, + Data: rawData, } b, err := json.Marshal(ev) @@ -80,14 +87,14 @@ func TestEventOmitEmpty(t *testing.T) { Ts: 1000, Type: "console.log", Category: CategoryConsole, - Source: SourceCDP, + Source: Source{Kind: KindCDP}, } b, err := json.Marshal(ev) require.NoError(t, err) s := string(b) - assert.NotContains(t, s, `"source_event"`) + assert.NotContains(t, s, `"event"`) assert.Contains(t, s, `"detail_level"`) } @@ -97,9 +104,9 @@ func TestRingBuffer(t *testing.T) { reader := rb.NewReader() events := []Event{ - {Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}, - {Seq: 2, Type: "network.request", Category: CategoryNetwork, Source: SourceCDP}, - {Seq: 3, Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}, + {Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}, + {Seq: 2, Type: "network.request", Category: CategoryNetwork, Source: Source{Kind: KindCDP}}, + {Seq: 3, Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}, } for _, ev := range events { @@ -123,9 +130,9 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { done := make(chan struct{}) go func() { - rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) close(done) }() @@ -143,16 +150,16 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { require.NoError(t, err) assert.Equal(t, "events.dropped", first.Type) assert.Equal(t, CategorySystem, first.Category) - assert.Equal(t, SourceKernelAPI, first.Source) + assert.Equal(t, KindKernelAPI, first.Source.Kind) } func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) reader := rb.NewReader() - rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) - rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -206,7 +213,7 @@ func TestConcurrentPublishRead(t *testing.T) { Seq: uint64(i), Type: "console.log", Category: CategoryConsole, - Source: SourceCDP, + Source: Source{Kind: KindCDP}, }) } }() @@ -226,7 +233,7 @@ func TestConcurrentReaders(t *testing.T) { } for i := 0; i < numEvents; i++ { - rb.Publish(Event{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: SourceCDP}) + rb.Publish(Event{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) } var wg sync.WaitGroup @@ -273,13 +280,13 @@ func TestFileWriter(t *testing.T) { file string category string }{ - {Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1}, "console.log", "console"}, - {Event{Type: "network.request", Category: CategoryNetwork, Source: SourceCDP, Seq: 1, Ts: 1}, "network.log", "network"}, - {Event{Type: "liveview.click", Category: CategoryLiveview, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, - {Event{Type: "captcha.solve", Category: CategoryCaptcha, Source: SourceExtension, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, - {Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Seq: 1, Ts: 1}, "page.log", "page"}, - {Event{Type: "input.click", Category: CategoryInteraction, Source: SourceCDP, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, - {Event{Type: "monitor.connected", Category: CategorySystem, Source: SourceKernelAPI, Seq: 1, Ts: 1}, "system.log", "system"}, + {Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "console.log", "console"}, + {Event{Type: "network.request", Category: CategoryNetwork, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "network.log", "network"}, + {Event{Type: "liveview.click", Category: CategoryLiveview, Source: Source{Kind: KindKernelAPI}, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, + {Event{Type: "captcha.solve", Category: CategoryCaptcha, Source: Source{Kind: KindExtension}, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, + {Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "page.log", "page"}, + {Event{Type: "input.click", Category: CategoryInteraction, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, + {Event{Type: "monitor.connected", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Seq: 1, Ts: 1}, "system.log", "system"}, } for _, e := range eventsToFile { @@ -298,7 +305,9 @@ func TestFileWriter(t *testing.T) { var decoded map[string]any require.NoError(t, json.Unmarshal(line, &decoded)) assert.Equal(t, e.category, decoded["category"], "wrong category in %s", e.file) - assert.Equal(t, string(e.ev.Source), decoded["source"], "wrong source in %s", e.file) + srcMap, ok := decoded["source"].(map[string]any) + require.True(t, ok, "source should be an object in %s", e.file) + assert.Equal(t, string(e.ev.Source.Kind), srcMap["kind"], "wrong source kind in %s", e.file) } }) @@ -307,7 +316,7 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - ev := Event{Type: "mystery", Category: "", Source: SourceCDP, Seq: 1, Ts: 1} + ev := Event{Type: "mystery", Category: "", Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1} data, _ := json.Marshal(ev) err := fw.Write(ev, data) require.Error(t, err) @@ -332,7 +341,7 @@ func TestFileWriter(t *testing.T) { Seq: uint64(i*eventsPerGoroutine + j), Type: "console.log", Category: CategoryConsole, - Source: SourceCDP, + Source: Source{Kind: KindCDP}, Ts: 1, } evData, err := json.Marshal(ev) @@ -362,7 +371,7 @@ func TestFileWriter(t *testing.T) { require.NoError(t, err) assert.Empty(t, entries, "files opened before first Write") - lazyEv := Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Seq: 1, Ts: 1} + lazyEv := Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1} lazyData, err := json.Marshal(lazyEv) require.NoError(t, err) require.NoError(t, fw.Write(lazyEv, lazyData)) @@ -403,7 +412,7 @@ func TestPipeline(t *testing.T) { go func() { defer wg.Done() for j := 0; j < eventsEach; j++ { - p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}) } }() } @@ -424,7 +433,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() for i := 0; i < 3; i++ { - p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) } ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -442,7 +451,7 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() before := time.Now().UnixMilli() - p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP}) // Ts == 0 + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}) // Ts == 0 after := time.Now().UnixMilli() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -457,7 +466,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_writes_file", func(t *testing.T) { p, dir := newPipeline(t) - p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}) data, err := os.ReadFile(filepath.Join(dir, "console.log")) require.NoError(t, err) @@ -472,7 +481,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -488,7 +497,7 @@ func TestPipeline(t *testing.T) { p.Start("test-uuid") reader := p.NewReader() - p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -509,7 +518,7 @@ func TestPipeline(t *testing.T) { p.Publish(Event{ Type: "page.navigation", Category: CategoryPage, - Source: SourceCDP, + Source: Source{Kind: KindCDP}, Ts: 1, Data: json.RawMessage(rawData), }) @@ -537,7 +546,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) reader := p.NewReader() - p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -546,7 +555,7 @@ func TestPipeline(t *testing.T) { require.NoError(t, err) assert.Equal(t, DetailStandard, ev.DetailLevel) - p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: SourceCDP, Ts: 1, DetailLevel: DetailVerbose}) + p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1, DetailLevel: DetailVerbose}) ev2, err := reader.Read(ctx) require.NoError(t, err) assert.Equal(t, DetailVerbose, ev2.DetailLevel) diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 44e54f39..385be3bf 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -75,7 +75,7 @@ func (r *Reader) Read(ctx context.Context) (Event, error) { r.nextSeq = oldest r.rb.mu.RUnlock() data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return Event{Type: "events.dropped", Category: CategorySystem, Source: SourceKernelAPI, Data: data}, nil + return Event{Type: "events.dropped", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Data: data}, nil } if r.nextSeq < written { From 41a7aeeb64fdaf655c73ca296cb37ab5b41e5bbc Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:09:01 +0000 Subject: [PATCH 13/27] refactor: extract Envelope wrapper, move seq and capture_session_id out of Event Event is now purely producer-emitted content. Pipeline-assigned metadata (seq, capture_session_id) lives on the Envelope. truncateIfNeeded operates on the full Envelope. Pipeline type comment now documents lifecycle semantics. --- server/lib/events/event.go | 48 +++--- server/lib/events/events_test.go | 251 +++++++++++++++++-------------- server/lib/events/filewriter.go | 8 +- server/lib/events/pipeline.go | 38 ++--- server/lib/events/ringbuffer.go | 40 +++-- 5 files changed, 201 insertions(+), 184 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 358288a7..9dab2ffc 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -46,34 +46,40 @@ const ( DetailRaw DetailLevel = "raw" ) -// Event is the canonical event structure for the capture pipeline. +// Event is the portable event schema. It contains only producer-emitted content; +// pipeline metadata (seq, capture session) lives on the Envelope. type Event struct { - CaptureSessionID string `json:"capture_session_id"` - Seq uint64 `json:"seq"` - Ts int64 `json:"ts"` - Type string `json:"type"` - Category EventCategory `json:"category"` - Source Source `json:"source"` - DetailLevel DetailLevel `json:"detail_level"` - URL string `json:"url,omitempty"` - Data json.RawMessage `json:"data,omitempty"` - Truncated bool `json:"truncated,omitempty"` + Ts int64 `json:"ts"` + Type string `json:"type"` + Category EventCategory `json:"category"` + Source Source `json:"source"` + DetailLevel DetailLevel `json:"detail_level"` + URL string `json:"url,omitempty"` + Data json.RawMessage `json:"data,omitempty"` + Truncated bool `json:"truncated,omitempty"` } -// truncateIfNeeded marshals ev and returns the (possibly truncated) event together -func truncateIfNeeded(ev Event) (Event, []byte) { - data, err := json.Marshal(ev) +// Envelope wraps an Event with pipeline-assigned metadata. +type Envelope struct { + CaptureSessionID string `json:"capture_session_id"` + Seq uint64 `json:"seq"` + Event Event `json:"event"` +} + +// truncateIfNeeded marshals env and returns the (possibly truncated) envelope +func truncateIfNeeded(env Envelope) (Envelope, []byte) { + data, err := json.Marshal(env) if err != nil { - return ev, data + return env, data } if len(data) <= maxS2RecordBytes { - return ev, data + return env, data } - ev.Data = json.RawMessage("null") - ev.Truncated = true - data, err = json.Marshal(ev) + env.Event.Data = json.RawMessage("null") + env.Event.Truncated = true + data, err = json.Marshal(env) if err != nil { - return ev, nil + return env, nil } - return ev, data + return env, data } diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index 0099f154..54fdae5b 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -17,11 +17,9 @@ import ( func TestEventSerialization(t *testing.T) { ev := Event{ - CaptureSessionID: "test-session-id", - Seq: 1, - Ts: 1234567890000, - Type: "console.log", - Category: CategoryConsole, + Ts: 1234567890000, + Type: "console.log", + Category: CategoryConsole, Source: Source{ Kind: KindCDP, Event: "Runtime.consoleAPICalled", @@ -46,8 +44,6 @@ func TestEventSerialization(t *testing.T) { assert.Equal(t, "console.log", decoded["type"]) assert.Equal(t, "console", decoded["category"]) assert.Equal(t, "standard", decoded["detail_level"]) - assert.Equal(t, "test-session-id", decoded["capture_session_id"]) - assert.Equal(t, float64(1), decoded["seq"]) assert.Equal(t, "https://example.com", decoded["url"]) src, ok := decoded["source"].(map[string]any) @@ -60,14 +56,37 @@ func TestEventSerialization(t *testing.T) { assert.Equal(t, "cdp-session-1", meta["cdp_session_id"]) } +func TestEnvelopeSerialization(t *testing.T) { + env := Envelope{ + CaptureSessionID: "test-session-id", + Seq: 1, + Event: Event{ + Ts: 1000, + Type: "console.log", + Category: CategoryConsole, + Source: Source{Kind: KindCDP}, + }, + } + + b, err := json.Marshal(env) + require.NoError(t, err) + + var decoded map[string]any + require.NoError(t, json.Unmarshal(b, &decoded)) + + assert.Equal(t, "test-session-id", decoded["capture_session_id"]) + assert.Equal(t, float64(1), decoded["seq"]) + inner, ok := decoded["event"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "console.log", inner["type"]) +} + func TestEventData(t *testing.T) { rawData := json.RawMessage(`{"key":"value","num":42}`) ev := Event{ - CaptureSessionID: "test-session", - Seq: 1, - Ts: 1000, - Type: "page.navigation", - Category: CategoryPage, + Ts: 1000, + Type: "page.navigation", + Category: CategoryPage, Source: Source{Kind: KindCDP}, Data: rawData, } @@ -82,11 +101,9 @@ func TestEventData(t *testing.T) { func TestEventOmitEmpty(t *testing.T) { ev := Event{ - CaptureSessionID: "sess", - Seq: 1, - Ts: 1000, - Type: "console.log", - Category: CategoryConsole, + Ts: 1000, + Type: "console.log", + Category: CategoryConsole, Source: Source{Kind: KindCDP}, } @@ -98,29 +115,37 @@ func TestEventOmitEmpty(t *testing.T) { assert.Contains(t, s, `"detail_level"`) } -// TestRingBuffer: publish 3 events; reader reads all 3 in order +func mkEnv(seq uint64, ev Event) Envelope { + return Envelope{Seq: seq, Event: ev} +} + +func cdpEvent(typ string, cat EventCategory) Event { + return Event{Type: typ, Category: cat, Source: Source{Kind: KindCDP}} +} + +// TestRingBuffer: publish 3 envelopes; reader reads all 3 in order func TestRingBuffer(t *testing.T) { rb := NewRingBuffer(10) reader := rb.NewReader() - events := []Event{ - {Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}, - {Seq: 2, Type: "network.request", Category: CategoryNetwork, Source: Source{Kind: KindCDP}}, - {Seq: 3, Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}, + envelopes := []Envelope{ + mkEnv(1, cdpEvent("console.log", CategoryConsole)), + mkEnv(2, cdpEvent("network.request", CategoryNetwork)), + mkEnv(3, cdpEvent("page.navigation", CategoryPage)), } - for _, ev := range events { - rb.Publish(ev) + for _, env := range envelopes { + rb.Publish(env) } ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - for i, expected := range events { + for i, expected := range envelopes { got, err := reader.Read(ctx) require.NoError(t, err, "reading event %d", i) - assert.Equal(t, expected.Type, got.Type) - assert.Equal(t, expected.Category, got.Category) + assert.Equal(t, expected.Event.Type, got.Event.Type) + assert.Equal(t, expected.Event.Category, got.Event.Category) } } @@ -130,9 +155,9 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { done := make(chan struct{}) go func() { - rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) - rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) - rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(mkEnv(1, cdpEvent("console.log", CategoryConsole))) + rb.Publish(mkEnv(2, cdpEvent("console.log", CategoryConsole))) + rb.Publish(mkEnv(3, cdpEvent("console.log", CategoryConsole))) close(done) }() @@ -148,32 +173,31 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { first, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events.dropped", first.Type) - assert.Equal(t, CategorySystem, first.Category) - assert.Equal(t, KindKernelAPI, first.Source.Kind) + assert.Equal(t, "events.dropped", first.Event.Type) + assert.Equal(t, CategorySystem, first.Event.Category) + assert.Equal(t, KindKernelAPI, first.Event.Source.Kind) } func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) reader := rb.NewReader() - rb.Publish(Event{Seq: 1, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) - rb.Publish(Event{Seq: 2, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) - rb.Publish(Event{Seq: 3, Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(mkEnv(1, cdpEvent("console.log", CategoryConsole))) + rb.Publish(mkEnv(2, cdpEvent("console.log", CategoryConsole))) + rb.Publish(mkEnv(3, cdpEvent("console.log", CategoryConsole))) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() first, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events.dropped", first.Type) - assert.Equal(t, CategorySystem, first.Category) + assert.Equal(t, "events.dropped", first.Event.Type) + assert.Equal(t, CategorySystem, first.Event.Category) - require.NotNil(t, first.Data) - assert.True(t, json.Valid(first.Data)) - assert.JSONEq(t, `{"dropped":1}`, string(first.Data)) + require.NotNil(t, first.Event.Data) + assert.True(t, json.Valid(first.Event.Data)) + assert.JSONEq(t, `{"dropped":1}`, string(first.Event.Data)) - // After the drop sentinel the reader continues with the surviving events second, err := reader.Read(ctx) require.NoError(t, err) assert.Equal(t, uint64(2), second.Seq) @@ -209,12 +233,7 @@ func TestConcurrentPublishRead(t *testing.T) { go func() { defer wg.Done() for i := 1; i <= numEvents; i++ { - rb.Publish(Event{ - Seq: uint64(i), - Type: "console.log", - Category: CategoryConsole, - Source: Source{Kind: KindCDP}, - }) + rb.Publish(mkEnv(uint64(i), cdpEvent("console.log", CategoryConsole))) } }() @@ -233,11 +252,11 @@ func TestConcurrentReaders(t *testing.T) { } for i := 0; i < numEvents; i++ { - rb.Publish(Event{Seq: uint64(i + 1), Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}}) + rb.Publish(mkEnv(uint64(i+1), cdpEvent("console.log", CategoryConsole))) } var wg sync.WaitGroup - results := make([][]Event, numReaders) + results := make([][]Envelope, numReaders) for i, r := range readers { wg.Add(1) @@ -246,24 +265,24 @@ func TestConcurrentReaders(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - var evs []Event + var envs []Envelope for j := 0; j < numEvents; j++ { - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) if !assert.NoError(t, err) { break } - evs = append(evs, ev) + envs = append(envs, env) } - results[idx] = evs + results[idx] = envs }(i, r) } wg.Wait() - for i, evs := range results { - assert.Len(t, evs, numEvents, "reader %d", i) - for j, ev := range evs { - assert.Equal(t, uint64(j+1), ev.Seq, "reader %d event %d", i, j) + for i, envs := range results { + assert.Len(t, envs, numEvents, "reader %d", i) + for j, env := range envs { + assert.Equal(t, uint64(j+1), env.Seq, "reader %d event %d", i, j) } } } @@ -275,39 +294,41 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - eventsToFile := []struct { - ev Event + envsToFile := []struct { + env Envelope file string category string }{ - {Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "console.log", "console"}, - {Event{Type: "network.request", Category: CategoryNetwork, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "network.log", "network"}, - {Event{Type: "liveview.click", Category: CategoryLiveview, Source: Source{Kind: KindKernelAPI}, Seq: 1, Ts: 1}, "liveview.log", "liveview"}, - {Event{Type: "captcha.solve", Category: CategoryCaptcha, Source: Source{Kind: KindExtension}, Seq: 1, Ts: 1}, "captcha.log", "captcha"}, - {Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "page.log", "page"}, - {Event{Type: "input.click", Category: CategoryInteraction, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1}, "interaction.log", "interaction"}, - {Event{Type: "monitor.connected", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Seq: 1, Ts: 1}, "system.log", "system"}, + {Envelope{Seq: 1, Event: Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}}, "console.log", "console"}, + {Envelope{Seq: 2, Event: Event{Type: "network.request", Category: CategoryNetwork, Source: Source{Kind: KindCDP}, Ts: 1}}, "network.log", "network"}, + {Envelope{Seq: 3, Event: Event{Type: "liveview.click", Category: CategoryLiveview, Source: Source{Kind: KindKernelAPI}, Ts: 1}}, "liveview.log", "liveview"}, + {Envelope{Seq: 4, Event: Event{Type: "captcha.solve", Category: CategoryCaptcha, Source: Source{Kind: KindExtension}, Ts: 1}}, "captcha.log", "captcha"}, + {Envelope{Seq: 5, Event: Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}}, "page.log", "page"}, + {Envelope{Seq: 6, Event: Event{Type: "input.click", Category: CategoryInteraction, Source: Source{Kind: KindCDP}, Ts: 1}}, "interaction.log", "interaction"}, + {Envelope{Seq: 7, Event: Event{Type: "monitor.connected", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Ts: 1}}, "system.log", "system"}, } - for _, e := range eventsToFile { - data, err := json.Marshal(e.ev) + for _, e := range envsToFile { + data, err := json.Marshal(e.env) require.NoError(t, err) - require.NoError(t, fw.Write(e.ev, data)) + require.NoError(t, fw.Write(e.env, data)) } - for _, e := range eventsToFile { + for _, e := range envsToFile { data, err := os.ReadFile(filepath.Join(dir, e.file)) - require.NoError(t, err, "missing file %s for type %s", e.file, e.ev.Type) + require.NoError(t, err, "missing file %s for type %s", e.file, e.env.Event.Type) line := bytes.TrimRight(data, "\n") require.True(t, json.Valid(line), "invalid JSON in %s", e.file) var decoded map[string]any require.NoError(t, json.Unmarshal(line, &decoded)) - assert.Equal(t, e.category, decoded["category"], "wrong category in %s", e.file) - srcMap, ok := decoded["source"].(map[string]any) + inner, ok := decoded["event"].(map[string]any) + require.True(t, ok) + assert.Equal(t, e.category, inner["category"], "wrong category in %s", e.file) + srcMap, ok := inner["source"].(map[string]any) require.True(t, ok, "source should be an object in %s", e.file) - assert.Equal(t, string(e.ev.Source.Kind), srcMap["kind"], "wrong source kind in %s", e.file) + assert.Equal(t, string(e.env.Event.Source.Kind), srcMap["kind"], "wrong source kind in %s", e.file) } }) @@ -316,9 +337,9 @@ func TestFileWriter(t *testing.T) { fw := NewFileWriter(dir) defer fw.Close() - ev := Event{Type: "mystery", Category: "", Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1} - data, _ := json.Marshal(ev) - err := fw.Write(ev, data) + env := Envelope{Seq: 1, Event: Event{Type: "mystery", Category: "", Source: Source{Kind: KindCDP}, Ts: 1}} + data, _ := json.Marshal(env) + err := fw.Write(env, data) require.Error(t, err) assert.Contains(t, err.Error(), "empty category") }) @@ -337,16 +358,13 @@ func TestFileWriter(t *testing.T) { go func(i int) { defer wg.Done() for j := 0; j < eventsPerGoroutine; j++ { - ev := Event{ - Seq: uint64(i*eventsPerGoroutine + j), - Type: "console.log", - Category: CategoryConsole, - Source: Source{Kind: KindCDP}, - Ts: 1, + env := Envelope{ + Seq: uint64(i*eventsPerGoroutine + j), + Event: Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}, } - evData, err := json.Marshal(ev) + envData, err := json.Marshal(env) require.NoError(t, err) - require.NoError(t, fw.Write(ev, evData)) + require.NoError(t, fw.Write(env, envData)) } }(i) } @@ -371,10 +389,10 @@ func TestFileWriter(t *testing.T) { require.NoError(t, err) assert.Empty(t, entries, "files opened before first Write") - lazyEv := Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Seq: 1, Ts: 1} - lazyData, err := json.Marshal(lazyEv) + env := Envelope{Seq: 1, Event: Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}} + envData, err := json.Marshal(env) require.NoError(t, err) - require.NoError(t, fw.Write(lazyEv, lazyData)) + require.NoError(t, fw.Write(env, envData)) entries, err = os.ReadDir(dir) require.NoError(t, err) @@ -399,7 +417,6 @@ func TestPipeline(t *testing.T) { const eventsEach = 50 const total = goroutines * eventsEach - // Ring must hold all events so no drop sentinels are emitted. rb := NewRingBuffer(total) fw := NewFileWriter(t.TempDir()) p := NewPipeline(rb, fw) @@ -412,7 +429,7 @@ func TestPipeline(t *testing.T) { go func() { defer wg.Done() for j := 0; j < eventsEach; j++ { - p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}) + p.Publish(cdpEvent("console.log", CategoryConsole)) } }() } @@ -422,9 +439,9 @@ func TestPipeline(t *testing.T) { defer cancel() for want := uint64(1); want <= total; want++ { - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, want, ev.Seq, "events must arrive in seq order") + assert.Equal(t, want, env.Seq, "events must arrive in seq order") } }) @@ -440,9 +457,9 @@ func TestPipeline(t *testing.T) { defer cancel() for want := uint64(1); want <= 3; want++ { - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, want, ev.Seq, "expected seq %d got %d", want, ev.Seq) + assert.Equal(t, want, env.Seq, "expected seq %d got %d", want, env.Seq) } }) @@ -451,16 +468,16 @@ func TestPipeline(t *testing.T) { reader := p.NewReader() before := time.Now().UnixMilli() - p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}) // Ts == 0 + p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}) after := time.Now().UnixMilli() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.GreaterOrEqual(t, ev.Ts, before) - assert.LessOrEqual(t, ev.Ts, after) + assert.GreaterOrEqual(t, env.Event.Ts, before) + assert.LessOrEqual(t, env.Event.Ts, after) }) t.Run("publish_writes_file", func(t *testing.T) { @@ -486,10 +503,10 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "page.navigation", ev.Type) - assert.Equal(t, CategoryPage, ev.Category) + assert.Equal(t, "page.navigation", env.Event.Type) + assert.Equal(t, CategoryPage, env.Event.Category) }) t.Run("start_sets_capture_session_id", func(t *testing.T) { @@ -502,9 +519,9 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "test-uuid", ev.CaptureSessionID) + assert.Equal(t, "test-uuid", env.CaptureSessionID) }) t.Run("truncation_applied", func(t *testing.T) { @@ -516,22 +533,22 @@ func TestPipeline(t *testing.T) { require.NoError(t, err) p.Publish(Event{ - Type: "page.navigation", - Category: CategoryPage, - Source: Source{Kind: KindCDP}, - Ts: 1, - Data: json.RawMessage(rawData), + Type: "page.navigation", + Category: CategoryPage, + Source: Source{Kind: KindCDP}, + Ts: 1, + Data: json.RawMessage(rawData), }) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.True(t, ev.Truncated) - assert.True(t, json.Valid(ev.Data)) + assert.True(t, env.Event.Truncated) + assert.True(t, json.Valid(env.Event.Data)) - marshaled, err := json.Marshal(ev) + marshaled, err := json.Marshal(env) require.NoError(t, err) assert.LessOrEqual(t, len(marshaled), maxS2RecordBytes) @@ -551,13 +568,13 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - ev, err := reader.Read(ctx) + env, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, DetailStandard, ev.DetailLevel) + assert.Equal(t, DetailStandard, env.Event.DetailLevel) p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1, DetailLevel: DetailVerbose}) - ev2, err := reader.Read(ctx) + env2, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, DetailVerbose, ev2.DetailLevel) + assert.Equal(t, DetailVerbose, env2.Event.DetailLevel) }) } diff --git a/server/lib/events/filewriter.go b/server/lib/events/filewriter.go index cab3133a..6ce5ff5f 100644 --- a/server/lib/events/filewriter.go +++ b/server/lib/events/filewriter.go @@ -21,11 +21,11 @@ func NewFileWriter(dir string) *FileWriter { return &FileWriter{dir: dir, files: make(map[EventCategory]*os.File)} } -// Write appends data as a single JSONL line to the per-category log file for ev -func (fw *FileWriter) Write(ev Event, data []byte) error { - cat := ev.Category +// Write appends data as a single JSONL line to the per-category log file. +func (fw *FileWriter) Write(env Envelope, data []byte) error { + cat := env.Event.Category if cat == "" { - return fmt.Errorf("filewriter: event %q has empty category", ev.Type) + return fmt.Errorf("filewriter: event %q has empty category", env.Event.Type) } fw.mu.Lock() diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index 1c3d31e8..403a1df0 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -7,7 +7,10 @@ import ( "time" ) -// Pipeline glues a RingBuffer and a FileWriter into a single write path +// Pipeline is a single-use write path that wraps events in envelopes and fans +// them out to a FileWriter (durable) and RingBuffer (in-memory). Call Start +// once with a capture session ID, then Publish concurrently. Close flushes the +// FileWriter; there is no restart or terminal event. type Pipeline struct { mu sync.Mutex ring *RingBuffer @@ -23,46 +26,43 @@ func NewPipeline(ring *RingBuffer, files *FileWriter) *Pipeline { return p } -// Start sets the capture session ID that will be stamped on every subsequent -// published event +// Start sets the capture session ID stamped on every subsequent envelope. func (p *Pipeline) Start(captureSessionID string) { p.captureSessionID.Store(&captureSessionID) } -// Publish stamps, truncates, files, and broadcasts a single event. -// -// Ordering: -// 1. Stamp CaptureSessionID, Seq, Ts (Ts only if caller left it zero) -// 2. Apply truncateIfNeeded — must happen before both sinks -// 3. Write to FileWriter (durable before in-memory) -// 4. Publish to RingBuffer (in-memory fan-out) +// Publish wraps ev in an Envelope, truncates if needed, then writes to +// FileWriter (durable) before RingBuffer (in-memory fan-out). func (p *Pipeline) Publish(ev Event) { p.mu.Lock() defer p.mu.Unlock() - ev.CaptureSessionID = *p.captureSessionID.Load() - ev.Seq = p.seq.Add(1) if ev.Ts == 0 { ev.Ts = time.Now().UnixMilli() } if ev.DetailLevel == "" { ev.DetailLevel = DetailStandard } - ev, data := truncateIfNeeded(ev) - if err := p.files.Write(ev, data); err != nil { - slog.Error("pipeline: file write failed", "seq", ev.Seq, "category", ev.Category, "err", err) + env := Envelope{ + CaptureSessionID: *p.captureSessionID.Load(), + Seq: p.seq.Add(1), + Event: ev, } - p.ring.Publish(ev) + env, data := truncateIfNeeded(env) + + if err := p.files.Write(env, data); err != nil { + slog.Error("pipeline: file write failed", "seq", env.Seq, "category", env.Event.Category, "err", err) + } + p.ring.Publish(env) } -// NewReader returns a Reader positioned at the start of the ring buffer +// NewReader returns a Reader positioned at the start of the ring buffer. func (p *Pipeline) NewReader() *Reader { return p.ring.NewReader() } -// Close closes the underlying FileWriter, flushing and releasing all open -// file descriptors +// Close flushes and releases all open file descriptors. func (p *Pipeline) Close() error { return p.files.Close() } diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 385be3bf..7a2cb522 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -9,11 +9,9 @@ import ( // RingBuffer is a fixed-capacity circular buffer with closed-channel broadcast fan-out. // Writers never block regardless of reader count or speed. -// Readers track their position by seq value (not ring index) and receive an -// events_dropped synthetic Event when they fall behind the oldest retained event. type RingBuffer struct { mu sync.RWMutex - buf []Event + buf []Envelope head int // next write position (mod cap) written uint64 // total ever published (monotonic) notify chan struct{} @@ -21,26 +19,23 @@ type RingBuffer struct { func NewRingBuffer(capacity int) *RingBuffer { return &RingBuffer{ - buf: make([]Event, capacity), + buf: make([]Envelope, capacity), notify: make(chan struct{}), } } -// Publish adds an event to the ring buffer, evicting the oldest entry on overflow. -// Closes the current notify channel (waking all waiting readers) and replaces it -// with a new one, outside the lock to avoid blocking under contention -func (rb *RingBuffer) Publish(ev Event) { +// Publish adds an envelope to the ring, evicting the oldest on overflow. +func (rb *RingBuffer) Publish(env Envelope) { rb.mu.Lock() - rb.buf[rb.head] = ev + rb.buf[rb.head] = env rb.head = (rb.head + 1) % len(rb.buf) rb.written++ old := rb.notify rb.notify = make(chan struct{}) rb.mu.Unlock() - close(old) // outside lock to avoid blocking under contention + close(old) } -// oldestSeq returns the seq of the oldest event still in the ring func (rb *RingBuffer) oldestSeq() uint64 { if rb.written <= uint64(len(rb.buf)) { return 0 @@ -48,10 +43,7 @@ func (rb *RingBuffer) oldestSeq() uint64 { return rb.written - uint64(len(rb.buf)) } -// NewReader returns a Reader positioned at publish index 0 -// If the ring has already published events, the reader will receive an -// events_dropped Event on the first Read call if it has fallen behind -// the oldest retained event +// NewReader returns a Reader positioned at publish index 0. func (rb *RingBuffer) NewReader() *Reader { return &Reader{rb: rb, nextSeq: 0} } @@ -59,11 +51,12 @@ func (rb *RingBuffer) NewReader() *Reader { // Reader tracks an independent read position in a RingBuffer. type Reader struct { rb *RingBuffer - nextSeq uint64 // publish index, not Event.Seq + nextSeq uint64 } -// Read blocks until the next event is available or ctx is cancelled -func (r *Reader) Read(ctx context.Context) (Event, error) { +// Read blocks until the next envelope is available or ctx is cancelled. +// When the reader has fallen behind, a synthetic drop event is returned. +func (r *Reader) Read(ctx context.Context) (Envelope, error) { for { r.rb.mu.RLock() notify := r.rb.notify @@ -75,24 +68,25 @@ func (r *Reader) Read(ctx context.Context) (Event, error) { r.nextSeq = oldest r.rb.mu.RUnlock() data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return Event{Type: "events.dropped", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Data: data}, nil + return Envelope{ + Event: Event{Type: "events.dropped", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Data: data}, + }, nil } if r.nextSeq < written { idx := int(r.nextSeq % uint64(len(r.rb.buf))) - ev := r.rb.buf[idx] + env := r.rb.buf[idx] r.nextSeq++ r.rb.mu.RUnlock() - return ev, nil + return env, nil } r.rb.mu.RUnlock() select { case <-ctx.Done(): - return Event{}, ctx.Err() + return Envelope{}, ctx.Err() case <-notify: - // new event available; loop to read it } } } From 9f4c808712c040e03f4908188fcabbfe58e93b8f Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:14:41 +0000 Subject: [PATCH 14/27] refactor: unify seq as universal cursor, add NewReader(afterSeq) Ring buffer now indexes by envelope.Seq directly, removing the separate head/written counters. NewReader takes an explicit afterSeq for resume support. Renamed notify to readerWake for clarity. --- server/lib/events/events_test.go | 24 ++++++------- server/lib/events/pipeline.go | 4 +-- server/lib/events/ringbuffer.go | 62 +++++++++++++++++++------------- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index 54fdae5b..de957672 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -126,7 +126,7 @@ func cdpEvent(typ string, cat EventCategory) Event { // TestRingBuffer: publish 3 envelopes; reader reads all 3 in order func TestRingBuffer(t *testing.T) { rb := NewRingBuffer(10) - reader := rb.NewReader() + reader := rb.NewReader(0) envelopes := []Envelope{ mkEnv(1, cdpEvent("console.log", CategoryConsole)), @@ -167,7 +167,7 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { t.Fatal("Publish blocked with no readers") } - reader := rb.NewReader() + reader := rb.NewReader(0) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -180,7 +180,7 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { func TestRingBufferOverflowExistingReader(t *testing.T) { rb := NewRingBuffer(2) - reader := rb.NewReader() + reader := rb.NewReader(0) rb.Publish(mkEnv(1, cdpEvent("console.log", CategoryConsole))) rb.Publish(mkEnv(2, cdpEvent("console.log", CategoryConsole))) @@ -214,7 +214,7 @@ func TestConcurrentPublishRead(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - reader := rb.NewReader() + reader := rb.NewReader(0) var wg sync.WaitGroup @@ -248,7 +248,7 @@ func TestConcurrentReaders(t *testing.T) { readers := make([]*Reader, numReaders) for i := range readers { - readers[i] = rb.NewReader() + readers[i] = rb.NewReader(0) } for i := 0; i < numEvents; i++ { @@ -421,7 +421,7 @@ func TestPipeline(t *testing.T) { fw := NewFileWriter(t.TempDir()) p := NewPipeline(rb, fw) t.Cleanup(func() { p.Close() }) - reader := p.NewReader() + reader := p.NewReader(0) var wg sync.WaitGroup for i := 0; i < goroutines; i++ { @@ -447,7 +447,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_increments_seq", func(t *testing.T) { p, _ := newPipeline(t) - reader := p.NewReader() + reader := p.NewReader(0) for i := 0; i < 3; i++ { p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) @@ -465,7 +465,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_sets_ts", func(t *testing.T) { p, _ := newPipeline(t) - reader := p.NewReader() + reader := p.NewReader(0) before := time.Now().UnixMilli() p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}}) @@ -497,7 +497,7 @@ func TestPipeline(t *testing.T) { t.Run("publish_writes_ring", func(t *testing.T) { p, _ := newPipeline(t) - reader := p.NewReader() + reader := p.NewReader(0) p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -513,7 +513,7 @@ func TestPipeline(t *testing.T) { p, _ := newPipeline(t) p.Start("test-uuid") - reader := p.NewReader() + reader := p.NewReader(0) p.Publish(Event{Type: "page.navigation", Category: CategoryPage, Source: Source{Kind: KindCDP}, Ts: 1}) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -526,7 +526,7 @@ func TestPipeline(t *testing.T) { t.Run("truncation_applied", func(t *testing.T) { p, dir := newPipeline(t) - reader := p.NewReader() + reader := p.NewReader(0) largeData := strings.Repeat("x", 1_100_000) rawData, err := json.Marshal(map[string]string{"payload": largeData}) @@ -561,7 +561,7 @@ func TestPipeline(t *testing.T) { t.Run("defaults_detail_level", func(t *testing.T) { p, _ := newPipeline(t) - reader := p.NewReader() + reader := p.NewReader(0) p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1}) diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index 403a1df0..ba7a7660 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -58,8 +58,8 @@ func (p *Pipeline) Publish(ev Event) { } // NewReader returns a Reader positioned at the start of the ring buffer. -func (p *Pipeline) NewReader() *Reader { - return p.ring.NewReader() +func (p *Pipeline) NewReader(afterSeq uint64) *Reader { + return p.ring.NewReader(afterSeq) } // Close flushes and releases all open file descriptors. diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index 7a2cb522..d7e31a41 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -10,42 +10,47 @@ import ( // RingBuffer is a fixed-capacity circular buffer with closed-channel broadcast fan-out. // Writers never block regardless of reader count or speed. type RingBuffer struct { - mu sync.RWMutex - buf []Envelope - head int // next write position (mod cap) - written uint64 // total ever published (monotonic) - notify chan struct{} + mu sync.RWMutex + buf []Envelope + cap uint64 + latestSeq uint64 // highest envelope.Seq published + readerWake chan struct{} // closed-and-replaced on each Publish to wake blocked readers } func NewRingBuffer(capacity int) *RingBuffer { return &RingBuffer{ - buf: make([]Envelope, capacity), - notify: make(chan struct{}), + buf: make([]Envelope, capacity), + cap: uint64(capacity), + readerWake: make(chan struct{}), } } // Publish adds an envelope to the ring, evicting the oldest on overflow. func (rb *RingBuffer) Publish(env Envelope) { rb.mu.Lock() - rb.buf[rb.head] = env - rb.head = (rb.head + 1) % len(rb.buf) - rb.written++ - old := rb.notify - rb.notify = make(chan struct{}) + rb.buf[env.Seq%rb.cap] = env + rb.latestSeq = env.Seq + old := rb.readerWake + rb.readerWake = make(chan struct{}) rb.mu.Unlock() close(old) } func (rb *RingBuffer) oldestSeq() uint64 { - if rb.written <= uint64(len(rb.buf)) { - return 0 + if rb.latestSeq <= rb.cap { + return 1 } - return rb.written - uint64(len(rb.buf)) + return rb.latestSeq - rb.cap + 1 } -// NewReader returns a Reader positioned at publish index 0. -func (rb *RingBuffer) NewReader() *Reader { - return &Reader{rb: rb, nextSeq: 0} +// NewReader returns a Reader. afterSeq == 0 starts from the oldest available +// envelope; afterSeq > 0 resumes after that seq. +func (rb *RingBuffer) NewReader(afterSeq uint64) *Reader { + nextSeq := afterSeq + 1 + if afterSeq == 0 { + nextSeq = 1 + } + return &Reader{rb: rb, nextSeq: nextSeq} } // Reader tracks an independent read position in a RingBuffer. @@ -59,9 +64,19 @@ type Reader struct { func (r *Reader) Read(ctx context.Context) (Envelope, error) { for { r.rb.mu.RLock() - notify := r.rb.notify + wake := r.rb.readerWake + latest := r.rb.latestSeq oldest := r.rb.oldestSeq() - written := r.rb.written + + if latest == 0 { + r.rb.mu.RUnlock() + select { + case <-ctx.Done(): + return Envelope{}, ctx.Err() + case <-wake: + continue + } + } if r.nextSeq < oldest { dropped := oldest - r.nextSeq @@ -73,9 +88,8 @@ func (r *Reader) Read(ctx context.Context) (Envelope, error) { }, nil } - if r.nextSeq < written { - idx := int(r.nextSeq % uint64(len(r.rb.buf))) - env := r.rb.buf[idx] + if r.nextSeq <= latest { + env := r.rb.buf[r.nextSeq%r.rb.cap] r.nextSeq++ r.rb.mu.RUnlock() return env, nil @@ -86,7 +100,7 @@ func (r *Reader) Read(ctx context.Context) (Envelope, error) { select { case <-ctx.Done(): return Envelope{}, ctx.Err() - case <-notify: + case <-wake: } } } From 6c82459c969fa2b54178d6d1b695deab272a1a2c Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:16:20 +0000 Subject: [PATCH 15/27] refactor: return ReadResult instead of synthetic drop events Drops are now stream metadata (ReadResult.Dropped) rather than fake events smuggled into the Event schema. Transport layer decides how to surface gaps on the wire. --- server/lib/events/events_test.go | 70 ++++++++++++++------------------ server/lib/events/ringbuffer.go | 34 ++++++++-------- 2 files changed, 49 insertions(+), 55 deletions(-) diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index de957672..38a2eb4c 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -15,6 +15,15 @@ import ( "github.com/stretchr/testify/require" ) +// readEnvelope is a test helper that calls Read and asserts a non-drop result. +func readEnvelope(t *testing.T, r *Reader, ctx context.Context) Envelope { + t.Helper() + res, err := r.Read(ctx) + require.NoError(t, err) + require.NotNil(t, res.Envelope, "expected envelope, got drop") + return *res.Envelope +} + func TestEventSerialization(t *testing.T) { ev := Event{ Ts: 1234567890000, @@ -142,10 +151,9 @@ func TestRingBuffer(t *testing.T) { defer cancel() for i, expected := range envelopes { - got, err := reader.Read(ctx) - require.NoError(t, err, "reading event %d", i) - assert.Equal(t, expected.Event.Type, got.Event.Type) - assert.Equal(t, expected.Event.Category, got.Event.Category) + got := readEnvelope(t, reader, ctx) + assert.Equal(t, expected.Event.Type, got.Event.Type, "event %d", i) + assert.Equal(t, expected.Event.Category, got.Event.Category, "event %d", i) } } @@ -171,11 +179,10 @@ func TestRingBufferOverflowNoBlock(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - first, err := reader.Read(ctx) + res, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events.dropped", first.Event.Type) - assert.Equal(t, CategorySystem, first.Event.Category) - assert.Equal(t, KindKernelAPI, first.Event.Source.Kind) + assert.Nil(t, res.Envelope, "expected drop, not envelope") + assert.True(t, res.Dropped > 0) } func TestRingBufferOverflowExistingReader(t *testing.T) { @@ -189,21 +196,17 @@ func TestRingBufferOverflowExistingReader(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - first, err := reader.Read(ctx) + // First read should be a drop notification + res, err := reader.Read(ctx) require.NoError(t, err) - assert.Equal(t, "events.dropped", first.Event.Type) - assert.Equal(t, CategorySystem, first.Event.Category) - - require.NotNil(t, first.Event.Data) - assert.True(t, json.Valid(first.Event.Data)) - assert.JSONEq(t, `{"dropped":1}`, string(first.Event.Data)) + assert.Nil(t, res.Envelope) + assert.Equal(t, uint64(1), res.Dropped) - second, err := reader.Read(ctx) - require.NoError(t, err) + // After the drop the reader continues with the surviving envelopes + second := readEnvelope(t, reader, ctx) assert.Equal(t, uint64(2), second.Seq) - third, err := reader.Read(ctx) - require.NoError(t, err) + third := readEnvelope(t, reader, ctx) assert.Equal(t, uint64(3), third.Seq) } @@ -267,10 +270,7 @@ func TestConcurrentReaders(t *testing.T) { var envs []Envelope for j := 0; j < numEvents; j++ { - env, err := reader.Read(ctx) - if !assert.NoError(t, err) { - break - } + env := readEnvelope(t, reader, ctx) envs = append(envs, env) } results[idx] = envs @@ -439,8 +439,7 @@ func TestPipeline(t *testing.T) { defer cancel() for want := uint64(1); want <= total; want++ { - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.Equal(t, want, env.Seq, "events must arrive in seq order") } }) @@ -457,8 +456,7 @@ func TestPipeline(t *testing.T) { defer cancel() for want := uint64(1); want <= 3; want++ { - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.Equal(t, want, env.Seq, "expected seq %d got %d", want, env.Seq) } }) @@ -474,8 +472,7 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.GreaterOrEqual(t, env.Event.Ts, before) assert.LessOrEqual(t, env.Event.Ts, after) }) @@ -503,8 +500,7 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.Equal(t, "page.navigation", env.Event.Type) assert.Equal(t, CategoryPage, env.Event.Category) }) @@ -519,8 +515,7 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.Equal(t, "test-uuid", env.CaptureSessionID) }) @@ -543,8 +538,7 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.True(t, env.Event.Truncated) assert.True(t, json.Valid(env.Event.Data)) @@ -568,13 +562,11 @@ func TestPipeline(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - env, err := reader.Read(ctx) - require.NoError(t, err) + env := readEnvelope(t, reader, ctx) assert.Equal(t, DetailStandard, env.Event.DetailLevel) p.Publish(Event{Type: "console.log", Category: CategoryConsole, Source: Source{Kind: KindCDP}, Ts: 1, DetailLevel: DetailVerbose}) - env2, err := reader.Read(ctx) - require.NoError(t, err) + env2 := readEnvelope(t, reader, ctx) assert.Equal(t, DetailVerbose, env2.Event.DetailLevel) }) } diff --git a/server/lib/events/ringbuffer.go b/server/lib/events/ringbuffer.go index d7e31a41..41659e94 100644 --- a/server/lib/events/ringbuffer.go +++ b/server/lib/events/ringbuffer.go @@ -2,19 +2,17 @@ package events import ( "context" - "encoding/json" - "fmt" "sync" ) // RingBuffer is a fixed-capacity circular buffer with closed-channel broadcast fan-out. // Writers never block regardless of reader count or speed. type RingBuffer struct { - mu sync.RWMutex - buf []Envelope - cap uint64 - latestSeq uint64 // highest envelope.Seq published - readerWake chan struct{} // closed-and-replaced on each Publish to wake blocked readers + mu sync.RWMutex + buf []Envelope + cap uint64 + latestSeq uint64 // highest envelope.Seq published + readerWake chan struct{} // closed-and-replaced on each Publish to wake blocked readers } func NewRingBuffer(capacity int) *RingBuffer { @@ -53,6 +51,14 @@ func (rb *RingBuffer) NewReader(afterSeq uint64) *Reader { return &Reader{rb: rb, nextSeq: nextSeq} } +// ReadResult is returned by Reader.Read. Exactly one of Envelope or Dropped is +// set: Envelope is non-nil for a normal read, Dropped is non-zero when the +// reader fell behind and events were lost. +type ReadResult struct { + Envelope *Envelope + Dropped uint64 +} + // Reader tracks an independent read position in a RingBuffer. type Reader struct { rb *RingBuffer @@ -60,8 +66,7 @@ type Reader struct { } // Read blocks until the next envelope is available or ctx is cancelled. -// When the reader has fallen behind, a synthetic drop event is returned. -func (r *Reader) Read(ctx context.Context) (Envelope, error) { +func (r *Reader) Read(ctx context.Context) (ReadResult, error) { for { r.rb.mu.RLock() wake := r.rb.readerWake @@ -72,7 +77,7 @@ func (r *Reader) Read(ctx context.Context) (Envelope, error) { r.rb.mu.RUnlock() select { case <-ctx.Done(): - return Envelope{}, ctx.Err() + return ReadResult{}, ctx.Err() case <-wake: continue } @@ -82,24 +87,21 @@ func (r *Reader) Read(ctx context.Context) (Envelope, error) { dropped := oldest - r.nextSeq r.nextSeq = oldest r.rb.mu.RUnlock() - data := json.RawMessage(fmt.Sprintf(`{"dropped":%d}`, dropped)) - return Envelope{ - Event: Event{Type: "events.dropped", Category: CategorySystem, Source: Source{Kind: KindKernelAPI}, Data: data}, - }, nil + return ReadResult{Dropped: dropped}, nil } if r.nextSeq <= latest { env := r.rb.buf[r.nextSeq%r.rb.cap] r.nextSeq++ r.rb.mu.RUnlock() - return env, nil + return ReadResult{Envelope: &env}, nil } r.rb.mu.RUnlock() select { case <-ctx.Done(): - return Envelope{}, ctx.Err() + return ReadResult{}, ctx.Err() case <-wake: } } From 6506ed7f82e00bdc3408b90f5112e0995f973633 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Tue, 31 Mar 2026 20:45:26 +0000 Subject: [PATCH 16/27] test: add NewReader resume tests for mid-stream, at-latest, and evicted cases --- server/lib/events/events_test.go | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/server/lib/events/events_test.go b/server/lib/events/events_test.go index 38a2eb4c..b9da4634 100644 --- a/server/lib/events/events_test.go +++ b/server/lib/events/events_test.go @@ -210,6 +210,47 @@ func TestRingBufferOverflowExistingReader(t *testing.T) { assert.Equal(t, uint64(3), third.Seq) } +func TestNewReaderResume(t *testing.T) { + rb := NewRingBuffer(10) + for i := uint64(1); i <= 5; i++ { + rb.Publish(mkEnv(i, cdpEvent("console.log", CategoryConsole))) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + t.Run("resume_mid_stream", func(t *testing.T) { + reader := rb.NewReader(3) + env := readEnvelope(t, reader, ctx) + assert.Equal(t, uint64(4), env.Seq) + }) + + t.Run("resume_at_latest", func(t *testing.T) { + reader := rb.NewReader(5) + // Nothing to read — should block until ctx cancels + shortCtx, cancel := context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + _, err := reader.Read(shortCtx) + assert.ErrorIs(t, err, context.DeadlineExceeded) + }) + + t.Run("resume_before_oldest_triggers_drop", func(t *testing.T) { + small := NewRingBuffer(3) + for i := uint64(1); i <= 5; i++ { + small.Publish(mkEnv(i, cdpEvent("console.log", CategoryConsole))) + } + // oldest in ring is seq 3, requesting resume after seq 1 + reader := small.NewReader(1) + res, err := reader.Read(ctx) + require.NoError(t, err) + assert.Nil(t, res.Envelope) + assert.Equal(t, uint64(1), res.Dropped) + + env := readEnvelope(t, reader, ctx) + assert.Equal(t, uint64(3), env.Seq) + }) +} + func TestConcurrentPublishRead(t *testing.T) { const numEvents = 20 rb := NewRingBuffer(32) From 8ecd49206726598ca30acf528b7ca2304903d50e Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 11:51:39 +0000 Subject: [PATCH 17/27] review: fmt --- server/lib/events/event.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 9dab2ffc..5ddb86ce 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -40,10 +40,10 @@ type Source struct { type DetailLevel string const ( - DetailMinimal DetailLevel = "minimal" + DetailMinimal DetailLevel = "minimal" DetailStandard DetailLevel = "standard" - DetailVerbose DetailLevel = "verbose" - DetailRaw DetailLevel = "raw" + DetailVerbose DetailLevel = "verbose" + DetailRaw DetailLevel = "raw" ) // Event is the portable event schema. It contains only producer-emitted content; From e572e7bab87278ff43dc045fadded6f629cf04ce Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 11:59:16 +0000 Subject: [PATCH 18/27] fix: guard against nil marshal data and oversized non-data envelopes truncateIfNeeded now warns if the envelope still exceeds the 1MB limit after nulling data (e.g. huge url or source.metadata). Pipeline.Publish skips the file write when marshal returns nil to avoid writing corrupt bare-newline JSONL lines. --- server/lib/events/event.go | 10 ++++++++-- server/lib/events/pipeline.go | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index 5ddb86ce..d464480f 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -2,6 +2,7 @@ package events import ( "encoding/json" + "log/slog" ) // maxS2RecordBytes is the maximum record size for the S2 event pipeline (1 MB). @@ -66,11 +67,13 @@ type Envelope struct { Event Event `json:"event"` } -// truncateIfNeeded marshals env and returns the (possibly truncated) envelope +// truncateIfNeeded marshals env and returns the (possibly truncated) envelope. +// If the envelope still exceeds maxS2RecordBytes after nulling data (e.g. huge +// url or source.metadata), it is returned as-is — callers must handle nil data. func truncateIfNeeded(env Envelope) (Envelope, []byte) { data, err := json.Marshal(env) if err != nil { - return env, data + return env, nil } if len(data) <= maxS2RecordBytes { return env, data @@ -81,5 +84,8 @@ func truncateIfNeeded(env Envelope) (Envelope, []byte) { if err != nil { return env, nil } + if len(data) > maxS2RecordBytes { + slog.Warn("truncateIfNeeded: envelope exceeds limit even without data", "seq", env.Seq, "size", len(data)) + } return env, data } diff --git a/server/lib/events/pipeline.go b/server/lib/events/pipeline.go index ba7a7660..e69c254f 100644 --- a/server/lib/events/pipeline.go +++ b/server/lib/events/pipeline.go @@ -51,7 +51,9 @@ func (p *Pipeline) Publish(ev Event) { } env, data := truncateIfNeeded(env) - if err := p.files.Write(env, data); err != nil { + if data == nil { + slog.Error("pipeline: marshal failed, skipping file write", "seq", env.Seq, "category", env.Event.Category) + } else if err := p.files.Write(env, data); err != nil { slog.Error("pipeline: file write failed", "seq", env.Seq, "category", env.Event.Category, "err", err) } p.ring.Publish(env) From 2719a32ef354f5e71395d9a4e5bbc783245d7e05 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:12:53 +0000 Subject: [PATCH 19/27] feat: add CategoryFor helper to derive event category from type string --- server/lib/events/event.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/server/lib/events/event.go b/server/lib/events/event.go index d464480f..185d6809 100644 --- a/server/lib/events/event.go +++ b/server/lib/events/event.go @@ -3,6 +3,7 @@ package events import ( "encoding/json" "log/slog" + "strings" ) // maxS2RecordBytes is the maximum record size for the S2 event pipeline (1 MB). @@ -67,6 +68,26 @@ type Envelope struct { Event Event `json:"event"` } +// CategoryFor derives an EventCategory from an event type string. +// It splits on the first underscore and maps the prefix to a category. +func CategoryFor(eventType string) EventCategory { + prefix, _, _ := strings.Cut(eventType, "_") + switch prefix { + case "console": + return CategoryConsole + case "network": + return CategoryNetwork + case "page", "navigation", "dom", "target": + return CategoryPage + case "interaction", "layout", "scroll": + return CategoryInteraction + case "screenshot", "monitor": + return CategorySystem + default: + return CategorySystem + } +} + // truncateIfNeeded marshals env and returns the (possibly truncated) envelope. // If the envelope still exceeds maxS2RecordBytes after nulling data (e.g. huge // url or source.metadata), it is returned as-is — callers must handle nil data. From 46884b8a3769a2d091b0ba644a28da8a5dc5f3b3 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:12:59 +0000 Subject: [PATCH 20/27] feat: add cdpmonitor stub with start/stop lifecycle --- server/lib/cdpmonitor/monitor.go | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 server/lib/cdpmonitor/monitor.go diff --git a/server/lib/cdpmonitor/monitor.go b/server/lib/cdpmonitor/monitor.go new file mode 100644 index 00000000..737f9650 --- /dev/null +++ b/server/lib/cdpmonitor/monitor.go @@ -0,0 +1,41 @@ +package cdpmonitor + +import ( + "context" + "sync/atomic" + + "github.com/onkernel/kernel-images/server/lib/events" +) + +// UpstreamProvider abstracts *devtoolsproxy.UpstreamManager for testability. +type UpstreamProvider interface { + Current() string + Subscribe() (<-chan string, func()) +} + +// PublishFunc publishes an Event to the pipeline. +type PublishFunc func(ev events.Event) + +// Monitor manages a CDP WebSocket connection with auto-attach session fan-out. +// Single-use per capture session: call Start to begin, Stop to tear down. +type Monitor struct { + running atomic.Bool +} + +// New creates a Monitor. displayNum is the X display for ffmpeg screenshots. +func New(_ UpstreamProvider, _ PublishFunc, _ int) *Monitor { + return &Monitor{} +} + +// IsRunning reports whether the monitor is actively capturing. +func (m *Monitor) IsRunning() bool { + return m.running.Load() +} + +// Start begins CDP capture. Restarts if already running. +func (m *Monitor) Start(_ context.Context) error { + return nil +} + +// Stop tears down the monitor. Safe to call multiple times. +func (m *Monitor) Stop() {} From 1e544a71690db98e91ec10593e0185d6f30969b2 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:13:06 +0000 Subject: [PATCH 21/27] feat: wire events pipeline and CDP monitor into API service --- server/cmd/api/api/api.go | 27 +++++++++++++++++++++++- server/cmd/api/api/api_test.go | 29 +++++++++++++++---------- server/cmd/api/api/display_test.go | 2 +- server/cmd/api/api/events.go | 34 ++++++++++++++++++++++++++++++ server/cmd/api/main.go | 12 +++++++++++ 5 files changed, 91 insertions(+), 13 deletions(-) create mode 100644 server/cmd/api/api/events.go diff --git a/server/cmd/api/api/api.go b/server/cmd/api/api/api.go index 122ae4d3..936b1b60 100644 --- a/server/cmd/api/api/api.go +++ b/server/cmd/api/api/api.go @@ -9,7 +9,9 @@ import ( "sync" "time" + "github.com/onkernel/kernel-images/server/lib/cdpmonitor" "github.com/onkernel/kernel-images/server/lib/devtoolsproxy" + "github.com/onkernel/kernel-images/server/lib/events" "github.com/onkernel/kernel-images/server/lib/logger" "github.com/onkernel/kernel-images/server/lib/nekoclient" oapi "github.com/onkernel/kernel-images/server/lib/oapi" @@ -68,11 +70,24 @@ type ApiService struct { // xvfbResizeMu serializes background Xvfb restarts to prevent races // when multiple CDP fast-path resizes fire in quick succession. xvfbResizeMu sync.Mutex + + // CDP event pipeline and cdpMonitor. + eventsPipeline *events.Pipeline + cdpMonitor *cdpmonitor.Monitor + monitorMu sync.Mutex } var _ oapi.StrictServerInterface = (*ApiService)(nil) -func New(recordManager recorder.RecordManager, factory recorder.FFmpegRecorderFactory, upstreamMgr *devtoolsproxy.UpstreamManager, stz scaletozero.Controller, nekoAuthClient *nekoclient.AuthClient) (*ApiService, error) { +func New( + recordManager recorder.RecordManager, + factory recorder.FFmpegRecorderFactory, + upstreamMgr *devtoolsproxy.UpstreamManager, + stz scaletozero.Controller, + nekoAuthClient *nekoclient.AuthClient, + eventsPipeline *events.Pipeline, + displayNum int, +) (*ApiService, error) { switch { case recordManager == nil: return nil, fmt.Errorf("recordManager cannot be nil") @@ -82,8 +97,12 @@ func New(recordManager recorder.RecordManager, factory recorder.FFmpegRecorderFa return nil, fmt.Errorf("upstreamMgr cannot be nil") case nekoAuthClient == nil: return nil, fmt.Errorf("nekoAuthClient cannot be nil") + case eventsPipeline == nil: + return nil, fmt.Errorf("eventsPipeline cannot be nil") } + mon := cdpmonitor.New(upstreamMgr, eventsPipeline.Publish, displayNum) + return &ApiService{ recordManager: recordManager, factory: factory, @@ -94,6 +113,8 @@ func New(recordManager recorder.RecordManager, factory recorder.FFmpegRecorderFa stz: stz, nekoAuthClient: nekoAuthClient, policy: &policy.Policy{}, + eventsPipeline: eventsPipeline, + cdpMonitor: mon, }, nil } @@ -313,5 +334,9 @@ func (s *ApiService) ListRecorders(ctx context.Context, _ oapi.ListRecordersRequ } func (s *ApiService) Shutdown(ctx context.Context) error { + s.monitorMu.Lock() + s.cdpMonitor.Stop() + _ = s.eventsPipeline.Close() + s.monitorMu.Unlock() return s.recordManager.StopAll(ctx) } diff --git a/server/cmd/api/api/api_test.go b/server/cmd/api/api/api_test.go index dc192e30..7c47f08f 100644 --- a/server/cmd/api/api/api_test.go +++ b/server/cmd/api/api/api_test.go @@ -12,6 +12,7 @@ import ( "log/slog" "github.com/onkernel/kernel-images/server/lib/devtoolsproxy" + "github.com/onkernel/kernel-images/server/lib/events" "github.com/onkernel/kernel-images/server/lib/nekoclient" oapi "github.com/onkernel/kernel-images/server/lib/oapi" "github.com/onkernel/kernel-images/server/lib/recorder" @@ -25,7 +26,7 @@ func TestApiService_StartRecording(t *testing.T) { t.Run("success", func(t *testing.T) { mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.StartRecording(ctx, oapi.StartRecordingRequestObject{}) @@ -39,7 +40,7 @@ func TestApiService_StartRecording(t *testing.T) { t.Run("already recording", func(t *testing.T) { mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) // First start should succeed @@ -54,7 +55,7 @@ func TestApiService_StartRecording(t *testing.T) { t.Run("custom ids don't collide", func(t *testing.T) { mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) for i := 0; i < 5; i++ { @@ -87,7 +88,7 @@ func TestApiService_StopRecording(t *testing.T) { t.Run("no active recording", func(t *testing.T) { mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.StopRecording(ctx, oapi.StopRecordingRequestObject{}) @@ -100,7 +101,7 @@ func TestApiService_StopRecording(t *testing.T) { rec := &mockRecorder{id: "default", isRecordingFlag: true} require.NoError(t, mgr.RegisterRecorder(ctx, rec), "failed to register recorder") - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.StopRecording(ctx, oapi.StopRecordingRequestObject{}) require.NoError(t, err) @@ -115,7 +116,7 @@ func TestApiService_StopRecording(t *testing.T) { force := true req := oapi.StopRecordingRequestObject{Body: &oapi.StopRecordingJSONRequestBody{ForceStop: &force}} - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.StopRecording(ctx, req) require.NoError(t, err) @@ -129,7 +130,7 @@ func TestApiService_DownloadRecording(t *testing.T) { t.Run("not found", func(t *testing.T) { mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.DownloadRecording(ctx, oapi.DownloadRecordingRequestObject{}) require.NoError(t, err) @@ -149,7 +150,7 @@ func TestApiService_DownloadRecording(t *testing.T) { rec := &mockRecorder{id: "default", isRecordingFlag: true, recordingData: randomBytes(minRecordingSizeInBytes - 1)} require.NoError(t, mgr.RegisterRecorder(ctx, rec), "failed to register recorder") - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) // will return a 202 when the recording is too small resp, err := svc.DownloadRecording(ctx, oapi.DownloadRecordingRequestObject{}) @@ -179,7 +180,7 @@ func TestApiService_DownloadRecording(t *testing.T) { rec := &mockRecorder{id: "default", recordingData: data} require.NoError(t, mgr.RegisterRecorder(ctx, rec), "failed to register recorder") - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) resp, err := svc.DownloadRecording(ctx, oapi.DownloadRecordingRequestObject{}) require.NoError(t, err) @@ -199,7 +200,7 @@ func TestApiService_Shutdown(t *testing.T) { rec := &mockRecorder{id: "default", isRecordingFlag: true} require.NoError(t, mgr.RegisterRecorder(ctx, rec), "failed to register recorder") - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) require.NoError(t, svc.Shutdown(ctx)) @@ -303,10 +304,16 @@ func newMockNekoClient(t *testing.T) *nekoclient.AuthClient { return client } +func newEventsPipeline() *events.Pipeline { + ring := events.NewRingBuffer(64) + fw := events.NewFileWriter(os.TempDir()) + return events.NewPipeline(ring, fw) +} + func TestApiService_PatchChromiumFlags(t *testing.T) { ctx := context.Background() mgr := recorder.NewFFmpegManager() - svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, newMockFactory(), newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) // Test with valid flags diff --git a/server/cmd/api/api/display_test.go b/server/cmd/api/api/display_test.go index acbe74c2..b63addaa 100644 --- a/server/cmd/api/api/display_test.go +++ b/server/cmd/api/api/display_test.go @@ -34,7 +34,7 @@ func testFFmpegFactory(t *testing.T, tempDir string) recorder.FFmpegRecorderFact func newTestServiceWithFactory(t *testing.T, mgr recorder.RecordManager, factory recorder.FFmpegRecorderFactory) *ApiService { t.Helper() - svc, err := New(mgr, factory, newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t)) + svc, err := New(mgr, factory, newTestUpstreamManager(), scaletozero.NewNoopController(), newMockNekoClient(t), newEventsPipeline(), 0) require.NoError(t, err) return svc } diff --git a/server/cmd/api/api/events.go b/server/cmd/api/api/events.go new file mode 100644 index 00000000..6ab875b4 --- /dev/null +++ b/server/cmd/api/api/events.go @@ -0,0 +1,34 @@ +package api + +import ( + "net/http" + + "github.com/google/uuid" + "github.com/onkernel/kernel-images/server/lib/logger" +) + +// StartCapture handles POST /events/start. +// Generates a new capture session ID, seeds the pipeline, then starts the +// CDP monitor. If already running, the monitor is stopped and +// restarted with a fresh session ID +func (s *ApiService) StartCapture(w http.ResponseWriter, r *http.Request) { + s.monitorMu.Lock() + defer s.monitorMu.Unlock() + + s.eventsPipeline.Start(uuid.New().String()) + + if err := s.cdpMonitor.Start(r.Context()); err != nil { + logger.FromContext(r.Context()).Error("failed to start CDP monitor", "err", err) + http.Error(w, "failed to start capture", http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) +} + +// StopCapture handles POST /events/stop +func (s *ApiService) StopCapture(w http.ResponseWriter, r *http.Request) { + s.monitorMu.Lock() + defer s.monitorMu.Unlock() + s.cdpMonitor.Stop() + w.WriteHeader(http.StatusOK) +} diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index c80ddd27..f277d4ce 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -24,6 +24,7 @@ import ( "github.com/onkernel/kernel-images/server/cmd/config" "github.com/onkernel/kernel-images/server/lib/chromedriverproxy" "github.com/onkernel/kernel-images/server/lib/devtoolsproxy" + "github.com/onkernel/kernel-images/server/lib/events" "github.com/onkernel/kernel-images/server/lib/logger" "github.com/onkernel/kernel-images/server/lib/nekoclient" oapi "github.com/onkernel/kernel-images/server/lib/oapi" @@ -90,12 +91,19 @@ func main() { os.Exit(1) } + // Construct events pipeline + eventsRing := events.NewRingBuffer(1024) + eventsFileWriter := events.NewFileWriter("/var/log") + eventsPipeline := events.NewPipeline(eventsRing, eventsFileWriter) + apiService, err := api.New( recorder.NewFFmpegManager(), recorder.NewFFmpegRecorderFactory(config.PathToFFmpeg, defaultParams, stz), upstreamMgr, stz, nekoAuthClient, + eventsPipeline, + config.DisplayNum, ) if err != nil { slogger.Error("failed to create api service", "err", err) @@ -120,6 +128,10 @@ func main() { w.Header().Set("Content-Type", "application/json") w.Write(jsonData) }) + // capture events + r.Post("/events/start", apiService.StartCapture) + r.Post("/events/stop", apiService.StopCapture) + // PTY attach endpoint (WebSocket) - not part of OpenAPI spec // Uses WebSocket for bidirectional streaming, which works well through proxies. r.Get("/process/{process_id}/attach", func(w http.ResponseWriter, r *http.Request) { From ebf93741fdc206663b41a5efe97381e7210c010c Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:25:19 +0000 Subject: [PATCH 22/27] feat: add CDP protocol message types and internal state structs --- server/lib/cdpmonitor/types.go | 113 +++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 server/lib/cdpmonitor/types.go diff --git a/server/lib/cdpmonitor/types.go b/server/lib/cdpmonitor/types.go new file mode 100644 index 00000000..f53e733b --- /dev/null +++ b/server/lib/cdpmonitor/types.go @@ -0,0 +1,113 @@ +package cdpmonitor + +import ( + "encoding/json" + "fmt" +) + +// targetInfo holds metadata about an attached CDP target/session. +type targetInfo struct { + targetID string + url string + targetType string +} + +// cdpError is the JSON-RPC error object returned by Chrome. +type cdpError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +func (e *cdpError) Error() string { + return fmt.Sprintf("CDP error %d: %s", e.Code, e.Message) +} + +// cdpMessage is the JSON-RPC message envelope used by Chrome's DevTools Protocol. +type cdpMessage struct { + ID int64 `json:"id,omitempty"` + Method string `json:"method,omitempty"` + Params json.RawMessage `json:"params,omitempty"` + SessionID string `json:"sessionId,omitempty"` + Result json.RawMessage `json:"result,omitempty"` + Error *cdpError `json:"error,omitempty"` +} + +// networkReqState holds request + response metadata until loadingFinished. +type networkReqState struct { + method string + url string + headers json.RawMessage + postData string + resourceType string + initiator json.RawMessage + status int + statusText string + resHeaders json.RawMessage + mimeType string +} + +// cdpConsoleArg is a single Runtime.consoleAPICalled argument. +type cdpConsoleArg struct { + Type string `json:"type"` + Value string `json:"value"` +} + +// cdpConsoleParams is the shape of Runtime.consoleAPICalled params. +type cdpConsoleParams struct { + Type string `json:"type"` + Args []cdpConsoleArg `json:"args"` + StackTrace json.RawMessage `json:"stackTrace"` +} + +// cdpExceptionDetails is the shape of Runtime.exceptionThrown params. +type cdpExceptionDetails struct { + ExceptionDetails struct { + Text string `json:"text"` + LineNumber int `json:"lineNumber"` + ColumnNumber int `json:"columnNumber"` + URL string `json:"url"` + StackTrace json.RawMessage `json:"stackTrace"` + } `json:"exceptionDetails"` +} + +// cdpTargetInfo is the shared TargetInfo shape used by Target events. +type cdpTargetInfo struct { + TargetID string `json:"targetId"` + Type string `json:"type"` + URL string `json:"url"` +} + +// cdpNetworkRequestParams is the shape of Network.requestWillBeSent params. +type cdpNetworkRequestParams struct { + RequestID string `json:"requestId"` + ResourceType string `json:"resourceType"` + Request struct { + Method string `json:"method"` + URL string `json:"url"` + Headers json.RawMessage `json:"headers"` + PostData string `json:"postData"` + } `json:"request"` + Initiator json.RawMessage `json:"initiator"` +} + +// cdpResponseReceivedParams is the shape of Network.responseReceived params. +type cdpResponseReceivedParams struct { + RequestID string `json:"requestId"` + Response struct { + Status int `json:"status"` + StatusText string `json:"statusText"` + Headers json.RawMessage `json:"headers"` + MimeType string `json:"mimeType"` + } `json:"response"` +} + +// cdpAttachedToTargetParams is the shape of Target.attachedToTarget params. +type cdpAttachedToTargetParams struct { + SessionID string `json:"sessionId"` + TargetInfo cdpTargetInfo `json:"targetInfo"` +} + +// cdpTargetCreatedParams is the shape of Target.targetCreated params. +type cdpTargetCreatedParams struct { + TargetInfo cdpTargetInfo `json:"targetInfo"` +} From 16edb0d21c81727767b6665d3a475e7df90c625e Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:25:29 +0000 Subject: [PATCH 23/27] feat: implement CDP monitor with websocket capture, event handlers, and reconnect --- server/lib/cdpmonitor/computed.go | 180 ++++++++++++++ server/lib/cdpmonitor/domains.go | 87 +++++++ server/lib/cdpmonitor/handlers.go | 362 ++++++++++++++++++++++++++++ server/lib/cdpmonitor/monitor.go | 307 ++++++++++++++++++++++- server/lib/cdpmonitor/screenshot.go | 87 +++++++ 5 files changed, 1017 insertions(+), 6 deletions(-) create mode 100644 server/lib/cdpmonitor/computed.go create mode 100644 server/lib/cdpmonitor/domains.go create mode 100644 server/lib/cdpmonitor/handlers.go create mode 100644 server/lib/cdpmonitor/screenshot.go diff --git a/server/lib/cdpmonitor/computed.go b/server/lib/cdpmonitor/computed.go new file mode 100644 index 00000000..c753730f --- /dev/null +++ b/server/lib/cdpmonitor/computed.go @@ -0,0 +1,180 @@ +package cdpmonitor + +import ( + "encoding/json" + "sync" + "time" + + "github.com/onkernel/kernel-images/server/lib/events" +) +// computedState holds the mutable state for all computed meta-events. +type computedState struct { + mu sync.Mutex + publish PublishFunc + + // network_idle: 500 ms debounce after all pending requests finish. + netPending int + netTimer *time.Timer + netFired bool + + // layout_settled: 1s after page_load with no intervening layout shifts. + layoutTimer *time.Timer + layoutFired bool + pageLoadSeen bool + + // navigation_settled: fires once dom_content_loaded, network_idle, and + // layout_settled have all fired after the same Page.frameNavigated. + navDOMLoaded bool + navNetIdle bool + navLayoutSettled bool + navFired bool +} + +// newComputedState creates a fresh computedState backed by the given publish func. +func newComputedState(publish PublishFunc) *computedState { + return &computedState{publish: publish} +} + +func stopTimer(t *time.Timer) { + if t == nil { + return + } + if !t.Stop() { + select { + case <-t.C: + default: + } + } +} + +// resetOnNavigation resets all state machines. Called on Page.frameNavigated +func (s *computedState) resetOnNavigation() { + s.mu.Lock() + defer s.mu.Unlock() + + stopTimer(s.netTimer) + s.netTimer = nil + s.netPending = 0 + s.netFired = false + + stopTimer(s.layoutTimer) + s.layoutTimer = nil + s.layoutFired = false + s.pageLoadSeen = false + + s.navDOMLoaded = false + s.navNetIdle = false + s.navLayoutSettled = false + s.navFired = false +} + +func (s *computedState) onRequest() { + s.mu.Lock() + defer s.mu.Unlock() + s.netPending++ + // A new request invalidates any pending network_idle timer + stopTimer(s.netTimer) + s.netTimer = nil +} + +// onLoadingFinished is called on Network.loadingFinished or Network.loadingFailed. +func (s *computedState) onLoadingFinished() { + s.mu.Lock() + defer s.mu.Unlock() + + s.netPending-- + if s.netPending < 0 { + s.netPending = 0 + } + if s.netPending > 0 || s.netFired { + return + } + // All requests done and not yet fired — start 500 ms debounce timer. + stopTimer(s.netTimer) + s.netTimer = time.AfterFunc(500*time.Millisecond, func() { + s.mu.Lock() + defer s.mu.Unlock() + if s.netFired || s.netPending > 0 { + return + } + s.netFired = true + s.navNetIdle = true + s.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "network_idle", + Category: events.CategoryNetwork, + Source: events.Source{Kind: events.KindCDP}, + DetailLevel: events.DetailStandard, + Data: json.RawMessage(`{}`), + }) + s.checkNavigationSettled() + }) +} + +// onPageLoad is called on Page.loadEventFired. +func (s *computedState) onPageLoad() { + s.mu.Lock() + defer s.mu.Unlock() + s.pageLoadSeen = true + if s.layoutFired { + return + } + // Start the 1 s layout_settled timer. + stopTimer(s.layoutTimer) + s.layoutTimer = time.AfterFunc(1*time.Second, s.emitLayoutSettled) +} + +// onLayoutShift is called when a layout_shift sentinel arrives from injected JS. +func (s *computedState) onLayoutShift() { + s.mu.Lock() + defer s.mu.Unlock() + if s.layoutFired || !s.pageLoadSeen { + return + } + // Reset the timer to 1 s from now. + stopTimer(s.layoutTimer) + s.layoutTimer = time.AfterFunc(1*time.Second, s.emitLayoutSettled) +} + +// emitLayoutSettled is called from the layout timer's AfterFunc goroutine +func (s *computedState) emitLayoutSettled() { + s.mu.Lock() + defer s.mu.Unlock() + if s.layoutFired || !s.pageLoadSeen { + return + } + s.layoutFired = true + s.navLayoutSettled = true + s.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "layout_settled", + Category: events.CategoryPage, + Source: events.Source{Kind: events.KindCDP}, + DetailLevel: events.DetailStandard, + Data: json.RawMessage(`{}`), + }) + s.checkNavigationSettled() +} + +// onDOMContentLoaded is called on Page.domContentEventFired. +func (s *computedState) onDOMContentLoaded() { + s.mu.Lock() + defer s.mu.Unlock() + s.navDOMLoaded = true + s.checkNavigationSettled() +} + +// checkNavigationSettled emits navigation_settled if all three flags are set +func (s *computedState) checkNavigationSettled() { + if s.navDOMLoaded && s.navNetIdle && s.navLayoutSettled && !s.navFired { + s.navFired = true + s.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "navigation_settled", + Category: events.CategoryPage, + Source: events.Source{Kind: events.KindCDP}, + DetailLevel: events.DetailStandard, + Data: json.RawMessage(`{}`), + }) + } +} diff --git a/server/lib/cdpmonitor/domains.go b/server/lib/cdpmonitor/domains.go new file mode 100644 index 00000000..f32932c6 --- /dev/null +++ b/server/lib/cdpmonitor/domains.go @@ -0,0 +1,87 @@ +package cdpmonitor + +import "context" + +// bindingName is the JS function exposed via Runtime.addBinding. +// Page JS calls this to fire Runtime.bindingCalled CDP events. +const bindingName = "__kernelEvent" + +// enableDomains enables CDP domains, registers the event binding, and starts +// layout-shift observation. Failures are non-fatal. +func (m *Monitor) enableDomains(ctx context.Context, sessionID string) { + for _, method := range []string{ + "Runtime.enable", + "Network.enable", + "Page.enable", + "DOM.enable", + } { + _, _ = m.send(ctx, method, nil, sessionID) + } + + _, _ = m.send(ctx, "Runtime.addBinding", map[string]any{ + "name": bindingName, + }, sessionID) + + _, _ = m.send(ctx, "PerformanceTimeline.enable", map[string]any{ + "eventTypes": []string{"layout-shift"}, + }, sessionID) +} + +// injectedJS tracks clicks, keys, and scrolls via the __kernelEvent binding. +// Layout shifts are handled natively by PerformanceTimeline.enable. +const injectedJS = `(function() { + var send = window.__kernelEvent; + if (!send) return; + + function sel(el) { + return el.id ? '#' + el.id : (el.className ? '.' + String(el.className).split(' ')[0] : ''); + } + + document.addEventListener('click', function(e) { + var t = e.target || {}; + send(JSON.stringify({ + type: 'interaction_click', + x: e.clientX, y: e.clientY, + selector: sel(t), tag: t.tagName || '', + text: (t.innerText || '').slice(0, 100) + })); + }, true); + + document.addEventListener('keydown', function(e) { + var t = e.target || {}; + send(JSON.stringify({ + type: 'interaction_key', + key: e.key, + selector: sel(t), tag: t.tagName || '' + })); + }, true); + + var scrollTimer = null; + var scrollStart = {x: window.scrollX, y: window.scrollY}; + document.addEventListener('scroll', function(e) { + var fromX = scrollStart.x, fromY = scrollStart.y; + var target = e.target; + var s = target === document ? 'document' : sel(target); + if (scrollTimer) clearTimeout(scrollTimer); + scrollTimer = setTimeout(function() { + var toX = window.scrollX, toY = window.scrollY; + if (Math.abs(toX - fromX) > 5 || Math.abs(toY - fromY) > 5) { + send(JSON.stringify({ + type: 'scroll_settled', + from_x: fromX, from_y: fromY, + to_x: toX, to_y: toY, + target_selector: s + })); + } + scrollStart = {x: toX, y: toY}; + }, 300); + }, true); +})();` + +// injectScript registers the interaction tracking JS for the given session. +func (m *Monitor) injectScript(ctx context.Context, sessionID string) error { + _, err := m.send(ctx, "Page.addScriptToEvaluateOnNewDocument", map[string]any{ + "source": injectedJS, + }, sessionID) + return err +} diff --git a/server/lib/cdpmonitor/handlers.go b/server/lib/cdpmonitor/handlers.go new file mode 100644 index 00000000..3501f50a --- /dev/null +++ b/server/lib/cdpmonitor/handlers.go @@ -0,0 +1,362 @@ +package cdpmonitor + +import ( + "encoding/json" + "time" + "unicode/utf8" + + "github.com/onkernel/kernel-images/server/lib/events" +) + +// publishEvent stamps common fields and publishes an Event. +func (m *Monitor) publishEvent(eventType string, source events.Source, sourceEvent string, data json.RawMessage, sessionID string) { + src := source + src.Event = sourceEvent + if sessionID != "" { + if src.Metadata == nil { + src.Metadata = make(map[string]string) + } + src.Metadata["cdp_session_id"] = sessionID + } + m.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: eventType, + Category: events.CategoryFor(eventType), + Source: src, + DetailLevel: events.DetailStandard, + Data: data, + }) +} + +// dispatchEvent routes a CDP event to its handler. +func (m *Monitor) dispatchEvent(msg cdpMessage) { + switch msg.Method { + case "Runtime.consoleAPICalled": + m.handleConsole(msg.Params, msg.SessionID) + case "Runtime.exceptionThrown": + m.handleExceptionThrown(msg.Params, msg.SessionID) + case "Runtime.bindingCalled": + m.handleBindingCalled(msg.Params, msg.SessionID) + case "Network.requestWillBeSent": + m.handleNetworkRequest(msg.Params, msg.SessionID) + case "Network.responseReceived": + m.handleResponseReceived(msg.Params, msg.SessionID) + case "Network.loadingFinished": + m.handleLoadingFinished(msg.Params, msg.SessionID) + case "Network.loadingFailed": + m.handleLoadingFailed(msg.Params, msg.SessionID) + case "Page.frameNavigated": + m.handleFrameNavigated(msg.Params, msg.SessionID) + case "Page.domContentEventFired": + m.handleDOMContentLoaded(msg.Params, msg.SessionID) + case "Page.loadEventFired": + m.handleLoadEventFired(msg.Params, msg.SessionID) + case "DOM.documentUpdated": + m.handleDOMUpdated(msg.Params, msg.SessionID) + case "PerformanceTimeline.timelineEventAdded": + m.handleTimelineEvent(msg.Params, msg.SessionID) + case "Target.attachedToTarget": + m.handleAttachedToTarget(msg) + case "Target.targetCreated": + m.handleTargetCreated(msg.Params, msg.SessionID) + case "Target.targetDestroyed": + m.handleTargetDestroyed(msg.Params, msg.SessionID) + } +} + +func (m *Monitor) handleConsole(params json.RawMessage, sessionID string) { + var p cdpConsoleParams + if err := json.Unmarshal(params, &p); err != nil { + return + } + + text := "" + if len(p.Args) > 0 { + text = p.Args[0].Value + } + argValues := make([]string, 0, len(p.Args)) + for _, a := range p.Args { + argValues = append(argValues, a.Value) + } + data, _ := json.Marshal(map[string]any{ + "level": p.Type, + "text": text, + "args": argValues, + "stack_trace": p.StackTrace, + }) + m.publishEvent("console_log", events.Source{Kind: events.KindCDP}, "Runtime.consoleAPICalled", data, sessionID) +} + +func (m *Monitor) handleExceptionThrown(params json.RawMessage, sessionID string) { + var p cdpExceptionDetails + if err := json.Unmarshal(params, &p); err != nil { + return + } + data, _ := json.Marshal(map[string]any{ + "text": p.ExceptionDetails.Text, + "line": p.ExceptionDetails.LineNumber, + "column": p.ExceptionDetails.ColumnNumber, + "url": p.ExceptionDetails.URL, + "stack_trace": p.ExceptionDetails.StackTrace, + }) + m.publishEvent("console_error", events.Source{Kind: events.KindCDP}, "Runtime.exceptionThrown", data, sessionID) + go m.maybeScreenshot(m.lifecycleCtx) +} + +// handleBindingCalled processes __kernelEvent binding calls. +func (m *Monitor) handleBindingCalled(params json.RawMessage, sessionID string) { + var p struct { + Name string `json:"name"` + Payload string `json:"payload"` + } + if err := json.Unmarshal(params, &p); err != nil || p.Name != bindingName { + return + } + payload := json.RawMessage(p.Payload) + if !json.Valid(payload) { + return + } + var header struct { + Type string `json:"type"` + } + if err := json.Unmarshal(payload, &header); err != nil { + return + } + switch header.Type { + case "interaction_click", "interaction_key", "scroll_settled": + m.publishEvent(header.Type, events.Source{Kind: events.KindCDP}, "Runtime.bindingCalled", payload, sessionID) + } +} + +// handleTimelineEvent processes layout-shift events from PerformanceTimeline. +func (m *Monitor) handleTimelineEvent(params json.RawMessage, sessionID string) { + var p struct { + Event struct { + Type string `json:"type"` + LayoutShift json.RawMessage `json:"layoutShiftDetails,omitempty"` + } `json:"event"` + } + if err := json.Unmarshal(params, &p); err != nil || p.Event.Type != "layout-shift" { + return + } + m.publishEvent("layout_shift", events.Source{Kind: events.KindCDP}, "PerformanceTimeline.timelineEventAdded", params, sessionID) + m.computed.onLayoutShift() +} + +func (m *Monitor) handleNetworkRequest(params json.RawMessage, sessionID string) { + var p cdpNetworkRequestParams + if err := json.Unmarshal(params, &p); err != nil { + return + } + m.pendReqMu.Lock() + m.pendingRequests[p.RequestID] = networkReqState{ + method: p.Request.Method, + url: p.Request.URL, + headers: p.Request.Headers, + postData: p.Request.PostData, + resourceType: p.ResourceType, + initiator: p.Initiator, + } + m.pendReqMu.Unlock() + data, _ := json.Marshal(map[string]any{ + "method": p.Request.Method, + "url": p.Request.URL, + "headers": p.Request.Headers, + "post_data": p.Request.PostData, + "resource_type": p.ResourceType, + "initiator": p.Initiator, + }) + m.publishEvent("network_request", events.Source{Kind: events.KindCDP}, "Network.requestWillBeSent", data, sessionID) + m.computed.onRequest() +} + +func (m *Monitor) handleResponseReceived(params json.RawMessage, sessionID string) { + var p cdpResponseReceivedParams + if err := json.Unmarshal(params, &p); err != nil { + return + } + m.pendReqMu.Lock() + if state, ok := m.pendingRequests[p.RequestID]; ok { + state.status = p.Response.Status + state.statusText = p.Response.StatusText + state.resHeaders = p.Response.Headers + state.mimeType = p.Response.MimeType + m.pendingRequests[p.RequestID] = state + } + m.pendReqMu.Unlock() +} + +func (m *Monitor) handleLoadingFinished(params json.RawMessage, sessionID string) { + var p struct { + RequestID string `json:"requestId"` + } + if err := json.Unmarshal(params, &p); err != nil { + return + } + m.pendReqMu.Lock() + state, ok := m.pendingRequests[p.RequestID] + if ok { + delete(m.pendingRequests, p.RequestID) + } + m.pendReqMu.Unlock() + if !ok { + return + } + // Fetch response body async to avoid blocking readLoop. + go func() { + ctx := m.lifecycleCtx + body := "" + result, err := m.send(ctx, "Network.getResponseBody", map[string]any{ + "requestId": p.RequestID, + }, sessionID) + if err == nil { + var resp struct { + Body string `json:"body"` + Base64Encoded bool `json:"base64Encoded"` + } + if json.Unmarshal(result, &resp) == nil { + body = truncateBody(resp.Body) + } + } + data, _ := json.Marshal(map[string]any{ + "method": state.method, + "url": state.url, + "status": state.status, + "status_text": state.statusText, + "headers": state.resHeaders, + "mime_type": state.mimeType, + "body": body, + }) + m.publishEvent("network_response", events.Source{Kind: events.KindCDP}, "Network.loadingFinished", data, sessionID) + m.computed.onLoadingFinished() + }() +} + +func (m *Monitor) handleLoadingFailed(params json.RawMessage, sessionID string) { + var p struct { + RequestID string `json:"requestId"` + ErrorText string `json:"errorText"` + Canceled bool `json:"canceled"` + } + if err := json.Unmarshal(params, &p); err != nil { + return + } + m.pendReqMu.Lock() + state, ok := m.pendingRequests[p.RequestID] + if ok { + delete(m.pendingRequests, p.RequestID) + } + m.pendReqMu.Unlock() + + ev := map[string]any{ + "error_text": p.ErrorText, + "canceled": p.Canceled, + } + if ok { + ev["url"] = state.url + } + data, _ := json.Marshal(ev) + m.publishEvent("network_loading_failed", events.Source{Kind: events.KindCDP}, "Network.loadingFailed", data, sessionID) + m.computed.onLoadingFinished() +} + +// truncateBody caps body at ~900KB on a valid UTF-8 boundary. +func truncateBody(body string) string { + const maxBody = 900 * 1024 + if len(body) <= maxBody { + return body + } + // Back up to a valid rune boundary. + truncated := body[:maxBody] + for !utf8.ValidString(truncated) { + truncated = truncated[:len(truncated)-1] + } + return truncated +} + +func (m *Monitor) handleFrameNavigated(params json.RawMessage, sessionID string) { + var p struct { + Frame struct { + ID string `json:"id"` + ParentID string `json:"parentId"` + URL string `json:"url"` + } `json:"frame"` + } + if err := json.Unmarshal(params, &p); err != nil { + return + } + data, _ := json.Marshal(map[string]any{ + "url": p.Frame.URL, + "frame_id": p.Frame.ID, + "parent_frame_id": p.Frame.ParentID, + }) + m.publishEvent("navigation", events.Source{Kind: events.KindCDP}, "Page.frameNavigated", data, sessionID) + + m.pendReqMu.Lock() + clear(m.pendingRequests) + m.pendReqMu.Unlock() + + m.computed.resetOnNavigation() +} + +func (m *Monitor) handleDOMContentLoaded(params json.RawMessage, sessionID string) { + m.publishEvent("dom_content_loaded", events.Source{Kind: events.KindCDP}, "Page.domContentEventFired", params, sessionID) + m.computed.onDOMContentLoaded() +} + +func (m *Monitor) handleLoadEventFired(params json.RawMessage, sessionID string) { + m.publishEvent("page_load", events.Source{Kind: events.KindCDP}, "Page.loadEventFired", params, sessionID) + m.computed.onPageLoad() + go m.maybeScreenshot(m.lifecycleCtx) +} + +func (m *Monitor) handleDOMUpdated(params json.RawMessage, sessionID string) { + m.publishEvent("dom_updated", events.Source{Kind: events.KindCDP}, "DOM.documentUpdated", params, sessionID) +} + +// handleAttachedToTarget stores the session and enables domains + injects script. +func (m *Monitor) handleAttachedToTarget(msg cdpMessage) { + var params cdpAttachedToTargetParams + if err := json.Unmarshal(msg.Params, ¶ms); err != nil { + return + } + m.sessionsMu.Lock() + m.sessions[params.SessionID] = targetInfo{ + targetID: params.TargetInfo.TargetID, + url: params.TargetInfo.URL, + targetType: params.TargetInfo.Type, + } + m.sessionsMu.Unlock() + + // Async to avoid blocking readLoop. + go func() { + m.enableDomains(m.lifecycleCtx, params.SessionID) + _ = m.injectScript(m.lifecycleCtx, params.SessionID) + }() +} + +func (m *Monitor) handleTargetCreated(params json.RawMessage, sessionID string) { + var p cdpTargetCreatedParams + if err := json.Unmarshal(params, &p); err != nil { + return + } + data, _ := json.Marshal(map[string]any{ + "target_id": p.TargetInfo.TargetID, + "target_type": p.TargetInfo.Type, + "url": p.TargetInfo.URL, + }) + m.publishEvent("target_created", events.Source{Kind: events.KindCDP}, "Target.targetCreated", data, sessionID) +} + +func (m *Monitor) handleTargetDestroyed(params json.RawMessage, sessionID string) { + var p struct { + TargetID string `json:"targetId"` + } + if err := json.Unmarshal(params, &p); err != nil { + return + } + data, _ := json.Marshal(map[string]any{ + "target_id": p.TargetID, + }) + m.publishEvent("target_destroyed", events.Source{Kind: events.KindCDP}, "Target.targetDestroyed", data, sessionID) +} diff --git a/server/lib/cdpmonitor/monitor.go b/server/lib/cdpmonitor/monitor.go index 737f9650..886e5946 100644 --- a/server/lib/cdpmonitor/monitor.go +++ b/server/lib/cdpmonitor/monitor.go @@ -2,8 +2,13 @@ package cdpmonitor import ( "context" + "encoding/json" + "fmt" + "sync" "sync/atomic" + "time" + "github.com/coder/websocket" "github.com/onkernel/kernel-images/server/lib/events" ) @@ -17,14 +22,49 @@ type UpstreamProvider interface { type PublishFunc func(ev events.Event) // Monitor manages a CDP WebSocket connection with auto-attach session fan-out. -// Single-use per capture session: call Start to begin, Stop to tear down. type Monitor struct { + upstreamMgr UpstreamProvider + publish PublishFunc + displayNum int + + conn *websocket.Conn + connMu sync.Mutex + + nextID atomic.Int64 + pendMu sync.Mutex + pending map[int64]chan cdpMessage + + sessionsMu sync.RWMutex + sessions map[string]targetInfo // sessionID → targetInfo + + pendReqMu sync.Mutex + pendingRequests map[string]networkReqState // requestId → networkReqState + + computed *computedState + + lastScreenshotAt atomic.Int64 // unix millis of last capture + screenshotFn func(ctx context.Context, displayNum int) ([]byte, error) // nil → real ffmpeg + + lifecycleCtx context.Context // cancelled on Stop() + cancel context.CancelFunc + done chan struct{} + running atomic.Bool } // New creates a Monitor. displayNum is the X display for ffmpeg screenshots. -func New(_ UpstreamProvider, _ PublishFunc, _ int) *Monitor { - return &Monitor{} +func New(upstreamMgr UpstreamProvider, publish PublishFunc, displayNum int) *Monitor { + m := &Monitor{ + upstreamMgr: upstreamMgr, + publish: publish, + displayNum: displayNum, + sessions: make(map[string]targetInfo), + pending: make(map[int64]chan cdpMessage), + pendingRequests: make(map[string]networkReqState), + } + m.computed = newComputedState(publish) + m.lifecycleCtx = context.Background() + return m } // IsRunning reports whether the monitor is actively capturing. @@ -33,9 +73,264 @@ func (m *Monitor) IsRunning() bool { } // Start begins CDP capture. Restarts if already running. -func (m *Monitor) Start(_ context.Context) error { +func (m *Monitor) Start(parentCtx context.Context) error { + if m.running.Load() { + m.Stop() + } + + devtoolsURL := m.upstreamMgr.Current() + if devtoolsURL == "" { + return fmt.Errorf("cdpmonitor: no DevTools URL available") + } + + conn, _, err := websocket.Dial(parentCtx, devtoolsURL, nil) + if err != nil { + return fmt.Errorf("cdpmonitor: dial %s: %w", devtoolsURL, err) + } + conn.SetReadLimit(8 * 1024 * 1024) + + m.connMu.Lock() + m.conn = conn + m.connMu.Unlock() + + ctx, cancel := context.WithCancel(parentCtx) + m.lifecycleCtx = ctx + m.cancel = cancel + m.done = make(chan struct{}) + + m.running.Store(true) + + go m.readLoop(ctx) + go m.subscribeToUpstream(ctx) + go m.initSession(ctx) // must run after readLoop starts + return nil } -// Stop tears down the monitor. Safe to call multiple times. -func (m *Monitor) Stop() {} +// Stop cancels the context and waits for goroutines to exit. +func (m *Monitor) Stop() { + if !m.running.Swap(false) { + return + } + if m.cancel != nil { + m.cancel() + } + if m.done != nil { + <-m.done + } + m.connMu.Lock() + if m.conn != nil { + _ = m.conn.Close(websocket.StatusNormalClosure, "stopped") + m.conn = nil + } + m.connMu.Unlock() + + m.sessionsMu.Lock() + m.sessions = make(map[string]targetInfo) + m.sessionsMu.Unlock() + + m.pendReqMu.Lock() + m.pendingRequests = make(map[string]networkReqState) + m.pendReqMu.Unlock() + + m.computed.resetOnNavigation() +} + +// readLoop reads CDP messages, routing responses to pending callers and +// dispatching events. Exits on connection close; respawned on reconnect. +func (m *Monitor) readLoop(ctx context.Context) { + defer close(m.done) + + for { + m.connMu.Lock() + conn := m.conn + m.connMu.Unlock() + if conn == nil { + return + } + + _, b, err := conn.Read(ctx) + if err != nil { + return + } + + var msg cdpMessage + if err := json.Unmarshal(b, &msg); err != nil { + continue + } + + if msg.ID != 0 { + m.pendMu.Lock() + ch, ok := m.pending[msg.ID] + m.pendMu.Unlock() + if ok { + select { + case ch <- msg: + default: + } + } + continue + } + + m.dispatchEvent(msg) + } +} + +// send issues a CDP command and blocks until the response arrives. +func (m *Monitor) send(ctx context.Context, method string, params any, sessionID string) (json.RawMessage, error) { + id := m.nextID.Add(1) + + var rawParams json.RawMessage + if params != nil { + b, err := json.Marshal(params) + if err != nil { + return nil, fmt.Errorf("marshal params: %w", err) + } + rawParams = b + } + + req := cdpMessage{ID: id, Method: method, Params: rawParams, SessionID: sessionID} + reqBytes, err := json.Marshal(req) + if err != nil { + return nil, fmt.Errorf("marshal request: %w", err) + } + + ch := make(chan cdpMessage, 1) + m.pendMu.Lock() + m.pending[id] = ch + m.pendMu.Unlock() + defer func() { + m.pendMu.Lock() + delete(m.pending, id) + m.pendMu.Unlock() + }() + + m.connMu.Lock() + conn := m.conn + m.connMu.Unlock() + if conn == nil { + return nil, fmt.Errorf("cdpmonitor: connection not open") + } + + if err := conn.Write(ctx, websocket.MessageText, reqBytes); err != nil { + return nil, fmt.Errorf("write: %w", err) + } + + select { + case resp := <-ch: + if resp.Error != nil { + return nil, resp.Error + } + return resp.Result, nil + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +// initSession enables CDP domains and injects the interaction-tracking script +// on a fresh connection (called async). +func (m *Monitor) initSession(ctx context.Context) { + _, _ = m.send(ctx, "Target.setAutoAttach", map[string]any{ + "autoAttach": true, + "waitForDebuggerOnStart": false, + "flatten": true, + }, "") + m.enableDomains(ctx, "") + _ = m.injectScript(ctx, "") +} + +// restartReadLoop waits for the old readLoop to exit, then spawns a new one. +func (m *Monitor) restartReadLoop(ctx context.Context) { + <-m.done + m.done = make(chan struct{}) + go m.readLoop(ctx) +} + +// subscribeToUpstream reconnects with backoff on Chrome restarts, emitting +// monitor_disconnected / monitor_reconnected events. +func (m *Monitor) subscribeToUpstream(ctx context.Context) { + ch, cancel := m.upstreamMgr.Subscribe() + defer cancel() + + backoffs := []time.Duration{ + 250 * time.Millisecond, + 500 * time.Millisecond, + 1 * time.Second, + 2 * time.Second, + } + + for { + select { + case <-ctx.Done(): + return + case newURL, ok := <-ch: + if !ok { + return + } + m.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "monitor_disconnected", + Category: events.CategorySystem, + Source: events.Source{Kind: events.KindLocalProcess}, + DetailLevel: events.DetailMinimal, + Data: json.RawMessage(`{"reason":"chrome_restarted"}`), + }) + + startReconnect := time.Now() + + m.connMu.Lock() + if m.conn != nil { + _ = m.conn.Close(websocket.StatusNormalClosure, "reconnecting") + m.conn = nil + } + m.connMu.Unlock() + + var reconnErr error + for attempt := range 10 { + if ctx.Err() != nil { + return + } + + idx := min(attempt, len(backoffs)-1) + select { + case <-ctx.Done(): + return + case <-time.After(backoffs[idx]): + } + + conn, _, err := websocket.Dial(ctx, newURL, nil) + if err != nil { + reconnErr = err + continue + } + conn.SetReadLimit(8 * 1024 * 1024) + + m.connMu.Lock() + m.conn = conn + m.connMu.Unlock() + + reconnErr = nil + break + } + + if reconnErr != nil { + return + } + + m.restartReadLoop(ctx) + go m.initSession(ctx) + + m.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "monitor_reconnected", + Category: events.CategorySystem, + Source: events.Source{Kind: events.KindLocalProcess}, + DetailLevel: events.DetailMinimal, + Data: json.RawMessage(fmt.Sprintf( + `{"reconnect_duration_ms":%d}`, + time.Since(startReconnect).Milliseconds(), + )), + }) + } + } +} diff --git a/server/lib/cdpmonitor/screenshot.go b/server/lib/cdpmonitor/screenshot.go new file mode 100644 index 00000000..54b7b985 --- /dev/null +++ b/server/lib/cdpmonitor/screenshot.go @@ -0,0 +1,87 @@ +package cdpmonitor + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os/exec" + "time" + + "github.com/onkernel/kernel-images/server/lib/events" +) + +// maybeScreenshot triggers a screenshot if the rate-limit window has elapsed. +// It uses an atomic CAS on lastScreenshotAt to ensure only one screenshot runs +// at a time. +func (m *Monitor) maybeScreenshot(ctx context.Context) { + now := time.Now().UnixMilli() + last := m.lastScreenshotAt.Load() + if now-last < 2000 { + return + } + if !m.lastScreenshotAt.CompareAndSwap(last, now) { + return + } + go m.captureScreenshot(ctx) +} + +// captureScreenshot takes a screenshot via ffmpeg x11grab (or the screenshotFn +// seam in tests), optionally downscales it, and publishes a screenshot event. +func (m *Monitor) captureScreenshot(ctx context.Context) { + var pngBytes []byte + var err error + + if m.screenshotFn != nil { + pngBytes, err = m.screenshotFn(ctx, m.displayNum) + } else { + pngBytes, err = captureViaFFmpeg(ctx, m.displayNum, 1) + } + if err != nil { + return + } + + // Downscale if base64 output would exceed 950KB (~729KB raw). + const rawThreshold = 729 * 1024 + for scale := 2; len(pngBytes) > rawThreshold && scale <= 16 && m.screenshotFn == nil; scale *= 2 { + pngBytes, err = captureViaFFmpeg(ctx, m.displayNum, scale) + if err != nil { + return + } + } + + encoded := base64.StdEncoding.EncodeToString(pngBytes) + data := json.RawMessage(fmt.Sprintf(`{"png":%q}`, encoded)) + + m.publish(events.Event{ + Ts: time.Now().UnixMilli(), + Type: "screenshot", + Category: events.CategorySystem, + Source: events.Source{Kind: events.KindLocalProcess}, + DetailLevel: events.DetailStandard, + Data: data, + }) +} + +// captureViaFFmpeg runs ffmpeg x11grab to capture a PNG screenshot. +// If divisor > 1, a scale filter is applied to reduce the output size. +func captureViaFFmpeg(ctx context.Context, displayNum, divisor int) ([]byte, error) { + args := []string{ + "-f", "x11grab", + "-i", fmt.Sprintf(":%d", displayNum), + "-vframes", "1", + } + if divisor > 1 { + args = append(args, "-vf", fmt.Sprintf("scale=iw/%d:ih/%d", divisor, divisor)) + } + args = append(args, "-f", "image2", "pipe:1") + + var out bytes.Buffer + cmd := exec.CommandContext(ctx, "ffmpeg", args...) + cmd.Stdout = &out + if err := cmd.Run(); err != nil { + return nil, err + } + return out.Bytes(), nil +} From 8fe30d0c2ba59eda631c3a033b90a1bd77ec8c9f Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Wed, 1 Apr 2026 13:25:34 +0000 Subject: [PATCH 24/27] test: add CDP monitor test suite with in-process websocket mock --- server/lib/cdpmonitor/monitor_test.go | 1142 +++++++++++++++++++++++++ 1 file changed, 1142 insertions(+) create mode 100644 server/lib/cdpmonitor/monitor_test.go diff --git a/server/lib/cdpmonitor/monitor_test.go b/server/lib/cdpmonitor/monitor_test.go new file mode 100644 index 00000000..d16104f1 --- /dev/null +++ b/server/lib/cdpmonitor/monitor_test.go @@ -0,0 +1,1142 @@ +package cdpmonitor + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/coder/websocket" + "github.com/coder/websocket/wsjson" + "github.com/onkernel/kernel-images/server/lib/events" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeCDPServer is a minimal WebSocket server that accepts connections and +// lets the test drive scripted message sequences. +type fakeCDPServer struct { + srv *httptest.Server + conn *websocket.Conn + connMu sync.Mutex + msgCh chan []byte // inbound messages from Monitor +} + +func newFakeCDPServer(t *testing.T) *fakeCDPServer { + t.Helper() + f := &fakeCDPServer{ + msgCh: make(chan []byte, 128), + } + f.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + c, err := websocket.Accept(w, r, &websocket.AcceptOptions{InsecureSkipVerify: true}) + if err != nil { + return + } + f.connMu.Lock() + f.conn = c + f.connMu.Unlock() + // drain messages from Monitor into msgCh until connection closes + go func() { + for { + _, b, err := c.Read(context.Background()) + if err != nil { + return + } + f.msgCh <- b + } + }() + })) + return f +} + +// wsURL returns a ws:// URL pointing at the fake server. +func (f *fakeCDPServer) wsURL() string { + return "ws" + strings.TrimPrefix(f.srv.URL, "http") +} + +// sendToMonitor pushes a raw JSON message to the Monitor's readLoop. +func (f *fakeCDPServer) sendToMonitor(t *testing.T, msg any) { + t.Helper() + f.connMu.Lock() + c := f.conn + f.connMu.Unlock() + require.NotNil(t, c, "no active connection") + err := wsjson.Write(context.Background(), c, msg) + require.NoError(t, err) +} + +// readFromMonitor blocks until the Monitor sends a message (with timeout). +func (f *fakeCDPServer) readFromMonitor(t *testing.T, timeout time.Duration) cdpMessage { + t.Helper() + select { + case b := <-f.msgCh: + var msg cdpMessage + require.NoError(t, json.Unmarshal(b, &msg)) + return msg + case <-time.After(timeout): + t.Fatal("timeout waiting for message from Monitor") + return cdpMessage{} + } +} + +func (f *fakeCDPServer) close() { + f.connMu.Lock() + if f.conn != nil { + _ = f.conn.Close(websocket.StatusNormalClosure, "done") + } + f.connMu.Unlock() + f.srv.Close() +} + +// fakeUpstream implements UpstreamProvider for tests. +type fakeUpstream struct { + mu sync.Mutex + current string + subs []chan string +} + +func newFakeUpstream(url string) *fakeUpstream { + return &fakeUpstream{current: url} +} + +func (f *fakeUpstream) Current() string { + f.mu.Lock() + defer f.mu.Unlock() + return f.current +} + +func (f *fakeUpstream) Subscribe() (<-chan string, func()) { + ch := make(chan string, 1) + f.mu.Lock() + f.subs = append(f.subs, ch) + f.mu.Unlock() + cancel := func() { + f.mu.Lock() + for i, s := range f.subs { + if s == ch { + f.subs = append(f.subs[:i], f.subs[i+1:]...) + break + } + } + f.mu.Unlock() + close(ch) + } + return ch, cancel +} + +// notifyRestart simulates Chrome restarting with a new DevTools URL. +func (f *fakeUpstream) notifyRestart(newURL string) { + f.mu.Lock() + f.current = newURL + subs := make([]chan string, len(f.subs)) + copy(subs, f.subs) + f.mu.Unlock() + for _, ch := range subs { + select { + case ch <- newURL: + default: + } + } +} + +// --- Tests --- + +// TestMonitorStart verifies that Monitor.Start() dials the URL from +// UpstreamProvider.Current() and establishes an isolated WebSocket connection. +func TestMonitorStart(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + upstream := newFakeUpstream(srv.wsURL()) + var published []events.Event + var publishMu sync.Mutex + publishFn := func(ev events.Event) { + publishMu.Lock() + published = append(published, ev) + publishMu.Unlock() + } + + m := New(upstream, publishFn, 99) + + ctx := context.Background() + err := m.Start(ctx) + require.NoError(t, err) + defer m.Stop() + + // Give readLoop time to start and send the setAutoAttach command. + // We just verify the connection was made and the Monitor is running. + assert.True(t, m.IsRunning()) + + // Read the first message sent by the Monitor — it should be Target.setAutoAttach. + msg := srv.readFromMonitor(t, 3*time.Second) + assert.Equal(t, "Target.setAutoAttach", msg.Method) +} + +// TestAutoAttach verifies that after Start(), the Monitor sends +// Target.setAutoAttach{autoAttach:true, waitForDebuggerOnStart:false, flatten:true} +// and that on receiving Target.attachedToTarget the session is stored. +func TestAutoAttach(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + upstream := newFakeUpstream(srv.wsURL()) + publishFn := func(ev events.Event) {} + + m := New(upstream, publishFn, 99) + + ctx := context.Background() + err := m.Start(ctx) + require.NoError(t, err) + defer m.Stop() + + // Read the setAutoAttach request from the Monitor. + msg := srv.readFromMonitor(t, 3*time.Second) + assert.Equal(t, "Target.setAutoAttach", msg.Method) + + var params struct { + AutoAttach bool `json:"autoAttach"` + WaitForDebuggerOnStart bool `json:"waitForDebuggerOnStart"` + Flatten bool `json:"flatten"` + } + require.NoError(t, json.Unmarshal(msg.Params, ¶ms)) + assert.True(t, params.AutoAttach) + assert.False(t, params.WaitForDebuggerOnStart) + assert.True(t, params.Flatten) + + // Acknowledge the command with a response. + srv.sendToMonitor(t, map[string]any{ + "id": msg.ID, + "result": map[string]any{}, + }) + + // Drain any domain-enable commands sent after setAutoAttach. + // The Monitor calls enableDomains (Runtime.enable, Network.enable, Page.enable, DOM.enable). + drainTimeout := time.NewTimer(500 * time.Millisecond) + for { + select { + case b := <-srv.msgCh: + var m2 cdpMessage + _ = json.Unmarshal(b, &m2) + // respond to enable commands + srv.connMu.Lock() + c := srv.conn + srv.connMu.Unlock() + if c != nil && m2.ID != 0 { + _ = wsjson.Write(context.Background(), c, map[string]any{ + "id": m2.ID, + "result": map[string]any{}, + }) + } + case <-drainTimeout.C: + goto afterDrain + } + } +afterDrain: + + // Now simulate Target.attachedToTarget event. + const testSessionID = "session-abc-123" + const testTargetID = "target-xyz-456" + srv.sendToMonitor(t, map[string]any{ + "method": "Target.attachedToTarget", + "params": map[string]any{ + "sessionId": testSessionID, + "targetInfo": map[string]any{ + "targetId": testTargetID, + "type": "page", + "url": "https://example.com", + }, + }, + }) + + // Give the Monitor time to process the event and store the session. + require.Eventually(t, func() bool { + m.sessionsMu.RLock() + defer m.sessionsMu.RUnlock() + _, ok := m.sessions[testSessionID] + return ok + }, 2*time.Second, 50*time.Millisecond, "session not stored after attachedToTarget") + + m.sessionsMu.RLock() + info := m.sessions[testSessionID] + m.sessionsMu.RUnlock() + assert.Equal(t, testTargetID, info.targetID) + assert.Equal(t, "page", info.targetType) +} + +// TestLifecycle verifies the idle→running→stopped→restart state machine. +func TestLifecycle(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + upstream := newFakeUpstream(srv.wsURL()) + publishFn := func(ev events.Event) {} + + m := New(upstream, publishFn, 99) + + // Idle at boot. + assert.False(t, m.IsRunning(), "should be idle at boot") + + ctx := context.Background() + + // First Start. + err := m.Start(ctx) + require.NoError(t, err) + assert.True(t, m.IsRunning(), "should be running after Start") + + // Drain the setAutoAttach message. + select { + case <-srv.msgCh: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for setAutoAttach") + } + + // Stop. + m.Stop() + assert.False(t, m.IsRunning(), "should be stopped after Stop") + + // Second Start while stopped — should start fresh. + err = m.Start(ctx) + require.NoError(t, err) + assert.True(t, m.IsRunning(), "should be running after second Start") + + // Drain the setAutoAttach message for the second start. + select { + case <-srv.msgCh: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for setAutoAttach on second start") + } + + // Second Start while already running — stop+restart. + err = m.Start(ctx) + require.NoError(t, err) + assert.True(t, m.IsRunning(), "should be running after stop+restart") + + m.Stop() + assert.False(t, m.IsRunning(), "should be stopped at end") +} + +// TestReconnect verifies that when UpstreamManager emits a new URL (Chrome restart), +// the monitor emits monitor_disconnected, reconnects, and emits monitor_reconnected. +func TestReconnect(t *testing.T) { + srv1 := newFakeCDPServer(t) + + upstream := newFakeUpstream(srv1.wsURL()) + + var published []events.Event + var publishMu sync.Mutex + var publishCount atomic.Int32 + publishFn := func(ev events.Event) { + publishMu.Lock() + published = append(published, ev) + publishMu.Unlock() + publishCount.Add(1) + } + + m := New(upstream, publishFn, 99) + + ctx := context.Background() + err := m.Start(ctx) + require.NoError(t, err) + defer m.Stop() + + // Drain setAutoAttach from srv1. + select { + case <-srv1.msgCh: + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for initial setAutoAttach") + } + + // Set up srv2 as the new Chrome URL. + srv2 := newFakeCDPServer(t) + defer srv2.close() + defer srv1.close() + + // Trigger Chrome restart notification. + upstream.notifyRestart(srv2.wsURL()) + + // Wait for monitor_disconnected event. + require.Eventually(t, func() bool { + publishMu.Lock() + defer publishMu.Unlock() + for _, ev := range published { + if ev.Type == "monitor_disconnected" { + return true + } + } + return false + }, 3*time.Second, 50*time.Millisecond, "monitor_disconnected not published") + + // Wait for the Monitor to connect to srv2 and send setAutoAttach. + select { + case <-srv2.msgCh: + // setAutoAttach received on srv2 + case <-time.After(5*time.Second): + t.Fatal("timeout waiting for setAutoAttach on srv2 after reconnect") + } + + // Wait for monitor_reconnected event. + require.Eventually(t, func() bool { + publishMu.Lock() + defer publishMu.Unlock() + for _, ev := range published { + if ev.Type == "monitor_reconnected" { + return true + } + } + return false + }, 3*time.Second, 50*time.Millisecond, "monitor_reconnected not published") + + // Verify monitor_reconnected contains reconnect_duration_ms. + publishMu.Lock() + var reconnEv events.Event + for _, ev := range published { + if ev.Type == "monitor_reconnected" { + reconnEv = ev + break + } + } + publishMu.Unlock() + + require.NotEmpty(t, reconnEv.Type) + var data map[string]any + require.NoError(t, json.Unmarshal(reconnEv.Data, &data)) + _, hasField := data["reconnect_duration_ms"] + assert.True(t, hasField, "monitor_reconnected missing reconnect_duration_ms field") +} + +// listenAndRespondAll drains srv.msgCh and responds with empty results until stopCh is closed. +func listenAndRespondAll(srv *fakeCDPServer, stopCh <-chan struct{}) { + for { + select { + case b := <-srv.msgCh: + var msg cdpMessage + if err := json.Unmarshal(b, &msg); err != nil { + continue + } + if msg.ID == 0 { + continue + } + srv.connMu.Lock() + c := srv.conn + srv.connMu.Unlock() + if c != nil { + _ = wsjson.Write(context.Background(), c, map[string]any{ + "id": msg.ID, + "result": map[string]any{}, + }) + } + case <-stopCh: + return + } + } +} + + +// startMonitorWithFakeServer is a helper that starts a monitor against a fake CDP server, +// drains the initial setAutoAttach + domain-enable commands, and returns a cleanup func. +func startMonitorWithFakeServer(t *testing.T, srv *fakeCDPServer) (*Monitor, *[]events.Event, *sync.Mutex, func()) { + t.Helper() + published := make([]events.Event, 0, 32) + var mu sync.Mutex + publishFn := func(ev events.Event) { + mu.Lock() + published = append(published, ev) + mu.Unlock() + } + upstream := newFakeUpstream(srv.wsURL()) + m := New(upstream, publishFn, 99) + ctx := context.Background() + require.NoError(t, m.Start(ctx)) + + stopResponder := make(chan struct{}) + go listenAndRespondAll(srv, stopResponder) + + cleanup := func() { + close(stopResponder) + m.Stop() + } + // Wait until the fake server has an active connection. + require.Eventually(t, func() bool { + srv.connMu.Lock() + defer srv.connMu.Unlock() + return srv.conn != nil + }, 3*time.Second, 20*time.Millisecond, "fake server never received a connection") + // Allow the readLoop and init commands to settle before sending test events. + time.Sleep(150 * time.Millisecond) + return m, &published, &mu, cleanup +} + +// waitForEvent blocks until an event of the given type is published, or times out. +func waitForEvent(t *testing.T, published *[]events.Event, mu *sync.Mutex, eventType string, timeout time.Duration) events.Event { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + mu.Lock() + for _, ev := range *published { + if ev.Type == eventType { + mu.Unlock() + return ev + } + } + mu.Unlock() + time.Sleep(20 * time.Millisecond) + } + t.Fatalf("timeout waiting for event type=%q", eventType) + return events.Event{} +} + + +// TestConsoleEvents verifies console_log, console_error, and [KERNEL_EVENT] sentinel routing. +func TestConsoleEvents(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + defer cleanup() + + // 1. consoleAPICalled → console_log + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.consoleAPICalled", + "params": map[string]any{ + "type": "log", + "args": []any{map[string]any{"type": "string", "value": "hello world"}}, + "executionContextId": 1, + }, + }) + ev := waitForEvent(t, published, mu, "console_log", 2*time.Second) + assert.Equal(t, events.CategoryConsole, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "Runtime.consoleAPICalled", ev.Source.Event) + assert.Equal(t, events.DetailStandard, ev.DetailLevel) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "log", data["level"]) + assert.Equal(t, "hello world", data["text"]) + + // 2. exceptionThrown → console_error + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.exceptionThrown", + "params": map[string]any{ + "timestamp": 1234.5, + "exceptionDetails": map[string]any{ + "text": "Uncaught TypeError", + "lineNumber": 42, + "columnNumber": 7, + "url": "https://example.com/app.js", + }, + }, + }) + ev2 := waitForEvent(t, published, mu, "console_error", 2*time.Second) + assert.Equal(t, events.CategoryConsole, ev2.Category) + assert.Equal(t, events.KindCDP, ev2.Source.Kind) + assert.Equal(t, "Runtime.exceptionThrown", ev2.Source.Event) + assert.Equal(t, events.DetailStandard, ev2.DetailLevel) + var data2 map[string]any + require.NoError(t, json.Unmarshal(ev2.Data, &data2)) + assert.Equal(t, "Uncaught TypeError", data2["text"]) + assert.Equal(t, float64(42), data2["line"]) + assert.Equal(t, float64(7), data2["column"]) + + // 3. Runtime.bindingCalled → interaction_click (via __kernelEvent binding) + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.bindingCalled", + "params": map[string]any{ + "name": "__kernelEvent", + "payload": `{"type":"interaction_click","x":10,"y":20,"selector":"button","tag":"BUTTON","text":"OK"}`, + }, + }) + ev3 := waitForEvent(t, published, mu, "interaction_click", 2*time.Second) + assert.Equal(t, events.CategoryInteraction, ev3.Category) + assert.Equal(t, "Runtime.bindingCalled", ev3.Source.Event) +} + +// TestNetworkEvents verifies network_request, network_response, and network_loading_failed. +func TestNetworkEvents(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + published := make([]events.Event, 0, 32) + var mu sync.Mutex + upstream := newFakeUpstream(srv.wsURL()) + m := New(upstream, func(ev events.Event) { + mu.Lock() + published = append(published, ev) + mu.Unlock() + }, 99) + ctx := context.Background() + require.NoError(t, m.Start(ctx)) + defer m.Stop() + + // Responder goroutine: answer all commands from the monitor. + // For Network.getResponseBody, return a real body; for everything else return {}. + stopResponder := make(chan struct{}) + defer close(stopResponder) + go func() { + for { + select { + case b := <-srv.msgCh: + var msg cdpMessage + if err := json.Unmarshal(b, &msg); err != nil { + continue + } + if msg.ID == 0 { + continue + } + srv.connMu.Lock() + c := srv.conn + srv.connMu.Unlock() + if c == nil { + continue + } + var resp any + if msg.Method == "Network.getResponseBody" { + resp = map[string]any{ + "id": msg.ID, + "result": map[string]any{"body": `{"ok":true}`, "base64Encoded": false}, + } + } else { + resp = map[string]any{"id": msg.ID, "result": map[string]any{}} + } + _ = wsjson.Write(context.Background(), c, resp) + case <-stopResponder: + return + } + } + }() + + // Wait for connection. + require.Eventually(t, func() bool { + srv.connMu.Lock() + defer srv.connMu.Unlock() + return srv.conn != nil + }, 3*time.Second, 20*time.Millisecond) + time.Sleep(150 * time.Millisecond) + + const reqID = "req-001" + + // 1. requestWillBeSent → network_request + srv.sendToMonitor(t, map[string]any{ + "method": "Network.requestWillBeSent", + "params": map[string]any{ + "requestId": reqID, + "resourceType": "XHR", + "request": map[string]any{ + "method": "POST", + "url": "https://api.example.com/data", + "headers": map[string]any{"Content-Type": "application/json"}, + }, + "initiator": map[string]any{"type": "script"}, + }, + }) + ev := waitForEvent(t, &published, &mu, "network_request", 2*time.Second) + assert.Equal(t, events.CategoryNetwork, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "Network.requestWillBeSent", ev.Source.Event) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "POST", data["method"]) + assert.Equal(t, "https://api.example.com/data", data["url"]) + + // 2. responseReceived + loadingFinished → network_response (with body via getResponseBody) + srv.sendToMonitor(t, map[string]any{ + "method": "Network.responseReceived", + "params": map[string]any{ + "requestId": reqID, + "response": map[string]any{ + "status": 200, + "statusText": "OK", + "url": "https://api.example.com/data", + "headers": map[string]any{"Content-Type": "application/json"}, + "mimeType": "application/json", + }, + }, + }) + srv.sendToMonitor(t, map[string]any{ + "method": "Network.loadingFinished", + "params": map[string]any{ + "requestId": reqID, + }, + }) + + ev2 := waitForEvent(t, &published, &mu, "network_response", 3*time.Second) + assert.Equal(t, events.CategoryNetwork, ev2.Category) + assert.Equal(t, "Network.loadingFinished", ev2.Source.Event) + var data2 map[string]any + require.NoError(t, json.Unmarshal(ev2.Data, &data2)) + assert.Equal(t, float64(200), data2["status"]) + assert.NotEmpty(t, data2["body"]) + + // 3. loadingFailed → network_loading_failed + const reqID2 = "req-002" + srv.sendToMonitor(t, map[string]any{ + "method": "Network.requestWillBeSent", + "params": map[string]any{ + "requestId": reqID2, + "request": map[string]any{ + "method": "GET", + "url": "https://fail.example.com/", + }, + }, + }) + waitForEvent(t, &published, &mu, "network_request", 2*time.Second) + + mu.Lock() + published = published[:0] + mu.Unlock() + + srv.sendToMonitor(t, map[string]any{ + "method": "Network.loadingFailed", + "params": map[string]any{ + "requestId": reqID2, + "errorText": "net::ERR_CONNECTION_REFUSED", + "canceled": false, + }, + }) + ev3 := waitForEvent(t, &published, &mu, "network_loading_failed", 2*time.Second) + assert.Equal(t, events.CategoryNetwork, ev3.Category) + var data3 map[string]any + require.NoError(t, json.Unmarshal(ev3.Data, &data3)) + assert.Equal(t, "net::ERR_CONNECTION_REFUSED", data3["error_text"]) +} + +// TestPageEvents verifies navigation, dom_content_loaded, page_load, and dom_updated. +func TestPageEvents(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + defer cleanup() + + // frameNavigated → navigation + srv.sendToMonitor(t, map[string]any{ + "method": "Page.frameNavigated", + "params": map[string]any{ + "frame": map[string]any{ + "id": "frame-1", + "url": "https://example.com/page", + }, + }, + }) + ev := waitForEvent(t, published, mu, "navigation", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "Page.frameNavigated", ev.Source.Event) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "https://example.com/page", data["url"]) + + // domContentEventFired → dom_content_loaded + srv.sendToMonitor(t, map[string]any{ + "method": "Page.domContentEventFired", + "params": map[string]any{"timestamp": 1000.0}, + }) + ev2 := waitForEvent(t, published, mu, "dom_content_loaded", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev2.Category) + + // loadEventFired → page_load + srv.sendToMonitor(t, map[string]any{ + "method": "Page.loadEventFired", + "params": map[string]any{"timestamp": 1001.0}, + }) + ev3 := waitForEvent(t, published, mu, "page_load", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev3.Category) + + // documentUpdated → dom_updated + srv.sendToMonitor(t, map[string]any{ + "method": "DOM.documentUpdated", + "params": map[string]any{}, + }) + ev4 := waitForEvent(t, published, mu, "dom_updated", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev4.Category) +} + +// TestTargetEvents verifies target_created and target_destroyed. +func TestTargetEvents(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + defer cleanup() + + // targetCreated → target_created + srv.sendToMonitor(t, map[string]any{ + "method": "Target.targetCreated", + "params": map[string]any{ + "targetInfo": map[string]any{ + "targetId": "target-1", + "type": "page", + "url": "https://new.example.com", + }, + }, + }) + ev := waitForEvent(t, published, mu, "target_created", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "Target.targetCreated", ev.Source.Event) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "target-1", data["target_id"]) + + // targetDestroyed → target_destroyed + srv.sendToMonitor(t, map[string]any{ + "method": "Target.targetDestroyed", + "params": map[string]any{ + "targetId": "target-1", + }, + }) + ev2 := waitForEvent(t, published, mu, "target_destroyed", 2*time.Second) + assert.Equal(t, events.CategoryPage, ev2.Category) + var data2 map[string]any + require.NoError(t, json.Unmarshal(ev2.Data, &data2)) + assert.Equal(t, "target-1", data2["target_id"]) +} + +// TestBindingAndTimeline verifies that scroll_settled arrives via +// Runtime.bindingCalled and layout_shift arrives via PerformanceTimeline. +func TestBindingAndTimeline(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + defer cleanup() + + // scroll_settled via Runtime.bindingCalled + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.bindingCalled", + "params": map[string]any{ + "name": "__kernelEvent", + "payload": `{"type":"scroll_settled","from_x":0,"from_y":0,"to_x":0,"to_y":500,"target_selector":"body"}`, + }, + }) + ev := waitForEvent(t, published, mu, "scroll_settled", 2*time.Second) + assert.Equal(t, events.CategoryInteraction, ev.Category) + assert.Equal(t, "Runtime.bindingCalled", ev.Source.Event) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, float64(500), data["to_y"]) + + // layout_shift via PerformanceTimeline.timelineEventAdded + srv.sendToMonitor(t, map[string]any{ + "method": "PerformanceTimeline.timelineEventAdded", + "params": map[string]any{ + "event": map[string]any{ + "type": "layout-shift", + }, + }, + }) + ev2 := waitForEvent(t, published, mu, "layout_shift", 2*time.Second) + assert.Equal(t, events.KindCDP, ev2.Source.Kind) + assert.Equal(t, "PerformanceTimeline.timelineEventAdded", ev2.Source.Event) + + noEventWithin(t, published, mu, "console_log", 100*time.Millisecond) +} + +// TestScreenshot verifies rate limiting and the screenshotFn testable seam. +func TestScreenshot(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + m, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + defer cleanup() + + // Inject a mock screenshotFn that returns a tiny valid PNG. + var captureCount atomic.Int32 + // 1x1 white PNG (minimal valid PNG bytes) + minimalPNG := []byte{ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk length + type + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // width=1, height=1 + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, // bit depth=8, color type=2, ... + 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, // IDAT chunk + 0x54, 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21, 0xbc, + 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, // IEND chunk + 0x44, 0xae, 0x42, 0x60, 0x82, + } + m.screenshotFn = func(ctx context.Context, displayNum int) ([]byte, error) { + captureCount.Add(1) + return minimalPNG, nil + } + + // First maybeScreenshot call — should capture. + ctx := context.Background() + m.maybeScreenshot(ctx) + // Give the goroutine time to run. + require.Eventually(t, func() bool { + return captureCount.Load() == 1 + }, 2*time.Second, 20*time.Millisecond) + + // Second call immediately after — should be rate-limited (no capture). + m.maybeScreenshot(ctx) + time.Sleep(100 * time.Millisecond) + assert.Equal(t, int32(1), captureCount.Load(), "second call within 2s should be rate-limited") + + // Verify screenshot event was published with png field. + ev := waitForEvent(t, published, mu, "screenshot", 2*time.Second) + assert.Equal(t, events.CategorySystem, ev.Category) + assert.Equal(t, events.KindLocalProcess, ev.Source.Kind) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.NotEmpty(t, data["png"]) + + // Fast-forward lastScreenshotAt to simulate 2s+ elapsed. + m.lastScreenshotAt.Store(time.Now().Add(-3 * time.Second).UnixMilli()) + m.maybeScreenshot(ctx) + require.Eventually(t, func() bool { + return captureCount.Load() == 2 + }, 2*time.Second, 20*time.Millisecond) +} + +// --- Computed meta-event tests --- + +// newComputedMonitor creates a Monitor with a capture function and returns +// the published events slice and its mutex for inspection. +func newComputedMonitor(t *testing.T) (*Monitor, *[]events.Event, *sync.Mutex) { + t.Helper() + var mu sync.Mutex + published := make([]events.Event, 0) + publishFn := func(ev events.Event) { + mu.Lock() + published = append(published, ev) + mu.Unlock() + } + upstream := newFakeUpstream("ws://127.0.0.1:0") // not used; no real dial + m := New(upstream, publishFn, 0) + return m, &published, &mu +} + + +// noEventWithin asserts that no event of the given type is published within d. +func noEventWithin(t *testing.T, published *[]events.Event, mu *sync.Mutex, eventType string, d time.Duration) { + t.Helper() + deadline := time.Now().Add(d) + for time.Now().Before(deadline) { + mu.Lock() + for _, ev := range *published { + if ev.Type == eventType { + mu.Unlock() + t.Fatalf("unexpected event %q published", eventType) + } + } + mu.Unlock() + time.Sleep(10 * time.Millisecond) + } +} + +// TestNetworkIdle verifies the 500ms debounce for network_idle. +func TestNetworkIdle(t *testing.T) { + m, published, mu := newComputedMonitor(t) + + // Simulate navigation (resets computed state). + navParams, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m.handleFrameNavigated(navParams, "s1") + // Drain the navigation event from published. + + // Helper to send requestWillBeSent. + sendReq := func(id string) { + p, _ := json.Marshal(map[string]any{ + "requestId": id, + "resourceType": "Document", + "request": map[string]any{"method": "GET", "url": "https://example.com/" + id}, + }) + m.handleNetworkRequest(p, "s1") + } + // Helper to send loadingFinished. + sendFinished := func(id string) { + // store minimal state so LoadAndDelete finds it + m.pendReqMu.Lock() + m.pendingRequests[id] = networkReqState{method: "GET", url: "https://example.com/" + id} + m.pendReqMu.Unlock() + p, _ := json.Marshal(map[string]any{"requestId": id}) + m.handleLoadingFinished(p, "s1") + } + + // Send 3 requests, then finish them all. + sendReq("r1") + sendReq("r2") + sendReq("r3") + + t0 := time.Now() + sendFinished("r1") + sendFinished("r2") + sendFinished("r3") + + // network_idle should fire ~500ms after the last loadingFinished. + ev := waitForEvent(t,published, mu, "network_idle", 2*time.Second) + elapsed := time.Since(t0) + assert.GreaterOrEqual(t, elapsed.Milliseconds(), int64(400), "network_idle fired too early") + assert.Equal(t, events.CategoryNetwork, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "", ev.Source.Event) + + // --- Timer reset test: new request within 500ms resets the clock --- + m2, published2, mu2 := newComputedMonitor(t) + navParams2, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m2.handleFrameNavigated(navParams2, "s1") + + sendReq2 := func(id string) { + p, _ := json.Marshal(map[string]any{ + "requestId": id, + "resourceType": "Document", + "request": map[string]any{"method": "GET", "url": "https://example.com/" + id}, + }) + m2.handleNetworkRequest(p, "s1") + } + sendFinished2 := func(id string) { + m2.pendReqMu.Lock() + m2.pendingRequests[id] = networkReqState{method: "GET", url: "https://example.com/" + id} + m2.pendReqMu.Unlock() + p, _ := json.Marshal(map[string]any{"requestId": id}) + m2.handleLoadingFinished(p, "s1") + } + + sendReq2("a1") + sendFinished2("a1") + // 200ms later, a new request starts (timer should reset) + time.Sleep(200 * time.Millisecond) + sendReq2("a2") + t1 := time.Now() + sendFinished2("a2") + + ev2 := waitForEvent(t,published2, mu2, "network_idle", 2*time.Second) + elapsed2 := time.Since(t1) + // Should fire ~500ms after a2 finished, not 500ms after a1 + assert.GreaterOrEqual(t, elapsed2.Milliseconds(), int64(400), "network_idle should reset timer on new request") + assert.Equal(t, events.CategoryNetwork, ev2.Category) +} + +// TestLayoutSettled verifies the 1s debounce for layout_settled. +func TestLayoutSettled(t *testing.T) { + m, published, mu := newComputedMonitor(t) + + // Navigate to reset state. + navParams, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m.handleFrameNavigated(navParams, "s1") + + // Simulate page_load (Page.loadEventFired). + // We bypass the ffmpeg screenshot side-effect by keeping screenshotFn nil-safe. + t0 := time.Now() + m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + + // layout_settled should fire ~1s after page_load (no layout shifts). + ev := waitForEvent(t,published, mu, "layout_settled", 3*time.Second) + elapsed := time.Since(t0) + assert.GreaterOrEqual(t, elapsed.Milliseconds(), int64(900), "layout_settled fired too early") + assert.Equal(t, events.CategoryPage, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "", ev.Source.Event) + + // --- Layout shift resets the timer --- + m2, published2, mu2 := newComputedMonitor(t) + navParams2, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m2.handleFrameNavigated(navParams2, "s1") + m2.handleLoadEventFired(json.RawMessage(`{}`), "s1") + + // Simulate a native CDP layout shift at 600ms. + time.Sleep(600 * time.Millisecond) + shiftParams, _ := json.Marshal(map[string]any{ + "event": map[string]any{"type": "layout-shift"}, + }) + m2.handleTimelineEvent(shiftParams, "s1") + t1 := time.Now() + + // layout_settled fires ~1s after the shift, not 1s after page_load. + ev2 := waitForEvent(t,published2, mu2, "layout_settled", 3*time.Second) + elapsed2 := time.Since(t1) + assert.GreaterOrEqual(t, elapsed2.Milliseconds(), int64(900), "layout_settled should reset after layout_shift") + assert.Equal(t, events.CategoryPage, ev2.Category) +} + +// TestScrollSettled verifies that a scroll_settled sentinel from JS is passed through. +func TestScrollSettled(t *testing.T) { + m, published, mu := newComputedMonitor(t) + + // Simulate scroll_settled via Runtime.bindingCalled. + bindingParams, _ := json.Marshal(map[string]any{ + "name": "__kernelEvent", + "payload": `{"type":"scroll_settled"}`, + }) + m.handleBindingCalled(bindingParams, "s1") + + ev := waitForEvent(t,published, mu, "scroll_settled", 1*time.Second) + assert.Equal(t, events.CategoryInteraction, ev.Category) +} + +// TestNavigationSettled verifies the three-flag gate for navigation_settled. +func TestNavigationSettled(t *testing.T) { + m, published, mu := newComputedMonitor(t) + + // Navigate to initialise flags. + navParams, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m.handleFrameNavigated(navParams, "s1") + + // Trigger dom_content_loaded. + m.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") + + // Trigger network_idle via load cycle. + reqP, _ := json.Marshal(map[string]any{ + "requestId": "r1", "resourceType": "Document", + "request": map[string]any{"method": "GET", "url": "https://example.com/r1"}, + }) + m.handleNetworkRequest(reqP, "s1") + m.pendReqMu.Lock() + m.pendingRequests["r1"] = networkReqState{method: "GET", url: "https://example.com/r1"} + m.pendReqMu.Unlock() + finP, _ := json.Marshal(map[string]any{"requestId": "r1"}) + m.handleLoadingFinished(finP, "s1") + + // Trigger layout_settled via page_load (1s timer). + m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + + // Wait for navigation_settled (all three flags set). + ev := waitForEvent(t,published, mu, "navigation_settled", 3*time.Second) + assert.Equal(t, events.CategoryPage, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "", ev.Source.Event) + + // --- Navigation interrupt test --- + m2, published2, mu2 := newComputedMonitor(t) + + navP1, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + }) + m2.handleFrameNavigated(navP1, "s1") + + // Start sequence: dom_content_loaded + network_idle. + m2.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") + reqP2, _ := json.Marshal(map[string]any{ + "requestId": "r2", "resourceType": "Document", + "request": map[string]any{"method": "GET", "url": "https://example.com/r2"}, + }) + m2.handleNetworkRequest(reqP2, "s1") + m2.pendReqMu.Lock() + m2.pendingRequests["r2"] = networkReqState{method: "GET", url: "https://example.com/r2"} + m2.pendReqMu.Unlock() + finP2, _ := json.Marshal(map[string]any{"requestId": "r2"}) + m2.handleLoadingFinished(finP2, "s1") + + // Interrupt with a new navigation before layout_settled fires. + navP2, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com/page2"}, + }) + m2.handleFrameNavigated(navP2, "s1") + + // navigation_settled should NOT fire for the interrupted sequence. + noEventWithin(t, published2, mu2, "navigation_settled", 1500*time.Millisecond) + _ = mu2 // suppress unused warning +} From 36b1484bc2c4ce089fa858678d233f0cbea67dab Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Thu, 2 Apr 2026 12:05:28 +0000 Subject: [PATCH 25/27] review: create util.go for helper funcs --- server/lib/cdpmonitor/computed.go | 11 +++- server/lib/cdpmonitor/handlers.go | 85 ++++++++++++++-------------- server/lib/cdpmonitor/monitor.go | 90 ++++++++++++++++++++++++------ server/lib/cdpmonitor/types.go | 1 - server/lib/cdpmonitor/util.go | 92 +++++++++++++++++++++++++++++++ 5 files changed, 213 insertions(+), 66 deletions(-) create mode 100644 server/lib/cdpmonitor/util.go diff --git a/server/lib/cdpmonitor/computed.go b/server/lib/cdpmonitor/computed.go index c753730f..1bbe4573 100644 --- a/server/lib/cdpmonitor/computed.go +++ b/server/lib/cdpmonitor/computed.go @@ -7,6 +7,11 @@ import ( "github.com/onkernel/kernel-images/server/lib/events" ) +const ( + networkIdleDebounce = 500 * time.Millisecond + layoutSettledDebounce = 1 * time.Second +) + // computedState holds the mutable state for all computed meta-events. type computedState struct { mu sync.Mutex @@ -91,7 +96,7 @@ func (s *computedState) onLoadingFinished() { } // All requests done and not yet fired — start 500 ms debounce timer. stopTimer(s.netTimer) - s.netTimer = time.AfterFunc(500*time.Millisecond, func() { + s.netTimer = time.AfterFunc(networkIdleDebounce, func() { s.mu.Lock() defer s.mu.Unlock() if s.netFired || s.netPending > 0 { @@ -121,7 +126,7 @@ func (s *computedState) onPageLoad() { } // Start the 1 s layout_settled timer. stopTimer(s.layoutTimer) - s.layoutTimer = time.AfterFunc(1*time.Second, s.emitLayoutSettled) + s.layoutTimer = time.AfterFunc(layoutSettledDebounce, s.emitLayoutSettled) } // onLayoutShift is called when a layout_shift sentinel arrives from injected JS. @@ -133,7 +138,7 @@ func (s *computedState) onLayoutShift() { } // Reset the timer to 1 s from now. stopTimer(s.layoutTimer) - s.layoutTimer = time.AfterFunc(1*time.Second, s.emitLayoutSettled) + s.layoutTimer = time.AfterFunc(layoutSettledDebounce, s.emitLayoutSettled) } // emitLayoutSettled is called from the layout timer's AfterFunc goroutine diff --git a/server/lib/cdpmonitor/handlers.go b/server/lib/cdpmonitor/handlers.go index 3501f50a..7450dc1c 100644 --- a/server/lib/cdpmonitor/handlers.go +++ b/server/lib/cdpmonitor/handlers.go @@ -3,12 +3,11 @@ package cdpmonitor import ( "encoding/json" "time" - "unicode/utf8" "github.com/onkernel/kernel-images/server/lib/events" ) -// publishEvent stamps common fields and publishes an Event. +// publishEvent stamps common fields and publishes an event. func (m *Monitor) publishEvent(eventType string, source events.Source, sourceEvent string, data json.RawMessage, sessionID string) { src := source src.Event = sourceEvent @@ -103,7 +102,7 @@ func (m *Monitor) handleExceptionThrown(params json.RawMessage, sessionID string go m.maybeScreenshot(m.lifecycleCtx) } -// handleBindingCalled processes __kernelEvent binding calls. +// handleBindingCalled processes __kernelEvent binding calls from the page. func (m *Monitor) handleBindingCalled(params json.RawMessage, sessionID string) { var p struct { Name string `json:"name"` @@ -128,7 +127,7 @@ func (m *Monitor) handleBindingCalled(params json.RawMessage, sessionID string) } } -// handleTimelineEvent processes layout-shift events from PerformanceTimeline. +// handleTimelineEvent processes PerformanceTimeline layout-shift events. func (m *Monitor) handleTimelineEvent(params json.RawMessage, sessionID string) { var p struct { Event struct { @@ -148,6 +147,15 @@ func (m *Monitor) handleNetworkRequest(params json.RawMessage, sessionID string) if err := json.Unmarshal(params, &p); err != nil { return } + // Extract only the initiator type; the stack trace is too verbose and dominates event size. + var initiatorType string + var raw struct { + Type string `json:"type"` + } + if json.Unmarshal(p.Initiator, &raw) == nil { + initiatorType = raw.Type + } + m.pendReqMu.Lock() m.pendingRequests[p.RequestID] = networkReqState{ method: p.Request.Method, @@ -155,16 +163,15 @@ func (m *Monitor) handleNetworkRequest(params json.RawMessage, sessionID string) headers: p.Request.Headers, postData: p.Request.PostData, resourceType: p.ResourceType, - initiator: p.Initiator, } m.pendReqMu.Unlock() data, _ := json.Marshal(map[string]any{ - "method": p.Request.Method, - "url": p.Request.URL, - "headers": p.Request.Headers, - "post_data": p.Request.PostData, - "resource_type": p.ResourceType, - "initiator": p.Initiator, + "method": p.Request.Method, + "url": p.Request.URL, + "headers": p.Request.Headers, + "post_data": p.Request.PostData, + "resource_type": p.ResourceType, + "initiator_type": initiatorType, }) m.publishEvent("network_request", events.Source{Kind: events.KindCDP}, "Network.requestWillBeSent", data, sessionID) m.computed.onRequest() @@ -202,30 +209,33 @@ func (m *Monitor) handleLoadingFinished(params json.RawMessage, sessionID string if !ok { return } - // Fetch response body async to avoid blocking readLoop. + // Fetch response body async to avoid blocking readLoop; binary types are skipped. go func() { ctx := m.lifecycleCtx body := "" - result, err := m.send(ctx, "Network.getResponseBody", map[string]any{ - "requestId": p.RequestID, - }, sessionID) - if err == nil { - var resp struct { - Body string `json:"body"` - Base64Encoded bool `json:"base64Encoded"` - } - if json.Unmarshal(result, &resp) == nil { - body = truncateBody(resp.Body) + if isTextualResource(state.resourceType, state.mimeType) { + result, err := m.send(ctx, "Network.getResponseBody", map[string]any{ + "requestId": p.RequestID, + }, sessionID) + if err == nil { + var resp struct { + Body string `json:"body"` + Base64Encoded bool `json:"base64Encoded"` + } + if json.Unmarshal(result, &resp) == nil { + body = truncateBody(resp.Body, bodyCapFor(state.mimeType)) + } } } data, _ := json.Marshal(map[string]any{ - "method": state.method, - "url": state.url, - "status": state.status, - "status_text": state.statusText, - "headers": state.resHeaders, - "mime_type": state.mimeType, - "body": body, + "method": state.method, + "url": state.url, + "status": state.status, + "status_text": state.statusText, + "headers": state.resHeaders, + "mime_type": state.mimeType, + "resource_type": state.resourceType, + "body": body, }) m.publishEvent("network_response", events.Source{Kind: events.KindCDP}, "Network.loadingFinished", data, sessionID) m.computed.onLoadingFinished() @@ -260,19 +270,6 @@ func (m *Monitor) handleLoadingFailed(params json.RawMessage, sessionID string) m.computed.onLoadingFinished() } -// truncateBody caps body at ~900KB on a valid UTF-8 boundary. -func truncateBody(body string) string { - const maxBody = 900 * 1024 - if len(body) <= maxBody { - return body - } - // Back up to a valid rune boundary. - truncated := body[:maxBody] - for !utf8.ValidString(truncated) { - truncated = truncated[:len(truncated)-1] - } - return truncated -} func (m *Monitor) handleFrameNavigated(params json.RawMessage, sessionID string) { var p struct { @@ -314,7 +311,7 @@ func (m *Monitor) handleDOMUpdated(params json.RawMessage, sessionID string) { m.publishEvent("dom_updated", events.Source{Kind: events.KindCDP}, "DOM.documentUpdated", params, sessionID) } -// handleAttachedToTarget stores the session and enables domains + injects script. +// handleAttachedToTarget stores the new session then enables domains and injects script. func (m *Monitor) handleAttachedToTarget(msg cdpMessage) { var params cdpAttachedToTargetParams if err := json.Unmarshal(msg.Params, ¶ms); err != nil { @@ -328,7 +325,7 @@ func (m *Monitor) handleAttachedToTarget(msg cdpMessage) { } m.sessionsMu.Unlock() - // Async to avoid blocking readLoop. + // Async to avoid blocking the readLoop. go func() { m.enableDomains(m.lifecycleCtx, params.SessionID) _ = m.injectScript(m.lifecycleCtx, params.SessionID) diff --git a/server/lib/cdpmonitor/monitor.go b/server/lib/cdpmonitor/monitor.go index 886e5946..3151375d 100644 --- a/server/lib/cdpmonitor/monitor.go +++ b/server/lib/cdpmonitor/monitor.go @@ -21,7 +21,11 @@ type UpstreamProvider interface { // PublishFunc publishes an Event to the pipeline. type PublishFunc func(ev events.Event) +const wsReadLimit = 8 * 1024 * 1024 + // Monitor manages a CDP WebSocket connection with auto-attach session fan-out. +// Reusable: Stop followed by Start reconnects cleanly. All exported methods are +// safe to call concurrently. Stop blocks until the read goroutine exits. type Monitor struct { upstreamMgr UpstreamProvider publish PublishFunc @@ -83,17 +87,20 @@ func (m *Monitor) Start(parentCtx context.Context) error { return fmt.Errorf("cdpmonitor: no DevTools URL available") } - conn, _, err := websocket.Dial(parentCtx, devtoolsURL, nil) + // Use background context so the monitor outlives the caller's request context. + ctx, cancel := context.WithCancel(context.Background()) + + conn, _, err := websocket.Dial(ctx, devtoolsURL, nil) if err != nil { + cancel() return fmt.Errorf("cdpmonitor: dial %s: %w", devtoolsURL, err) } - conn.SetReadLimit(8 * 1024 * 1024) + conn.SetReadLimit(wsReadLimit) m.connMu.Lock() m.conn = conn m.connMu.Unlock() - ctx, cancel := context.WithCancel(parentCtx) m.lifecycleCtx = ctx m.cancel = cancel m.done = make(chan struct{}) @@ -136,19 +143,18 @@ func (m *Monitor) Stop() { m.computed.resetOnNavigation() } -// readLoop reads CDP messages, routing responses to pending callers and -// dispatching events. Exits on connection close; respawned on reconnect. +// readLoop reads CDP messages, routing responses to pending callers and dispatching events. func (m *Monitor) readLoop(ctx context.Context) { defer close(m.done) - for { - m.connMu.Lock() - conn := m.conn - m.connMu.Unlock() - if conn == nil { - return - } + m.connMu.Lock() + conn := m.conn + m.connMu.Unlock() + if conn == nil { + return + } + for { _, b, err := conn.Read(ctx) if err != nil { return @@ -227,8 +233,8 @@ func (m *Monitor) send(ctx context.Context, method string, params any, sessionID } } -// initSession enables CDP domains and injects the interaction-tracking script -// on a fresh connection (called async). +// initSession enables CDP domains, injects the interaction-tracking script, +// and manually attaches to any targets already open when the monitor started. func (m *Monitor) initSession(ctx context.Context) { _, _ = m.send(ctx, "Target.setAutoAttach", map[string]any{ "autoAttach": true, @@ -237,17 +243,65 @@ func (m *Monitor) initSession(ctx context.Context) { }, "") m.enableDomains(ctx, "") _ = m.injectScript(ctx, "") + m.attachExistingTargets(ctx) +} + +// attachExistingTargets fetches all open targets and attaches to any that are +// not already tracked. This catches pages that were open before Start() was called. +func (m *Monitor) attachExistingTargets(ctx context.Context) { + result, err := m.send(ctx, "Target.getTargets", nil, "") + if err != nil { + return + } + var resp struct { + TargetInfos []cdpTargetInfo `json:"targetInfos"` + } + if err := json.Unmarshal(result, &resp); err != nil { + return + } + for _, ti := range resp.TargetInfos { + if ti.Type != "page" { + continue + } + m.sessionsMu.RLock() + alreadyAttached := false + for _, info := range m.sessions { + if info.targetID == ti.TargetID { + alreadyAttached = true + break + } + } + m.sessionsMu.RUnlock() + if alreadyAttached { + continue + } + go func(targetID string) { + res, err := m.send(ctx, "Target.attachToTarget", map[string]any{ + "targetId": targetID, + "flatten": true, + }, "") + if err != nil { + return + } + var attached struct { + SessionID string `json:"sessionId"` + } + if json.Unmarshal(res, &attached) == nil && attached.SessionID != "" { + m.enableDomains(ctx, attached.SessionID) + _ = m.injectScript(ctx, attached.SessionID) + } + }(ti.TargetID) + } } -// restartReadLoop waits for the old readLoop to exit, then spawns a new one. +// restartReadLoop waits for the current readLoop to exit, then starts a new one. func (m *Monitor) restartReadLoop(ctx context.Context) { <-m.done m.done = make(chan struct{}) go m.readLoop(ctx) } -// subscribeToUpstream reconnects with backoff on Chrome restarts, emitting -// monitor_disconnected / monitor_reconnected events. +// subscribeToUpstream reconnects with backoff on Chrome restarts, publishing disconnect/reconnect events. func (m *Monitor) subscribeToUpstream(ctx context.Context) { ch, cancel := m.upstreamMgr.Subscribe() defer cancel() @@ -303,7 +357,7 @@ func (m *Monitor) subscribeToUpstream(ctx context.Context) { reconnErr = err continue } - conn.SetReadLimit(8 * 1024 * 1024) + conn.SetReadLimit(wsReadLimit) m.connMu.Lock() m.conn = conn diff --git a/server/lib/cdpmonitor/types.go b/server/lib/cdpmonitor/types.go index f53e733b..c61c3335 100644 --- a/server/lib/cdpmonitor/types.go +++ b/server/lib/cdpmonitor/types.go @@ -39,7 +39,6 @@ type networkReqState struct { headers json.RawMessage postData string resourceType string - initiator json.RawMessage status int statusText string resHeaders json.RawMessage diff --git a/server/lib/cdpmonitor/util.go b/server/lib/cdpmonitor/util.go new file mode 100644 index 00000000..5c29fad9 --- /dev/null +++ b/server/lib/cdpmonitor/util.go @@ -0,0 +1,92 @@ +package cdpmonitor + +import ( + "slices" + "strings" + "unicode/utf8" +) + +// isTextualResource reports whether the resource warrants body capture. +// resourceType is checked first; mimeType is a fallback for resources with no type (e.g. in-flight at attach time). +func isTextualResource(resourceType, mimeType string) bool { + switch resourceType { + case "Font", "Image", "Media": + return false + } + return isCapturedMIME(mimeType) +} + +// isCapturedMIME returns true for MIME types whose bodies are worth capturing. +// Binary formats (vendor types, binary encodings, raw streams) are excluded. +func isCapturedMIME(mime string) bool { + if mime == "" { + return true // unknown, capture conservatively + } + for _, prefix := range []string{"image/", "font/", "audio/", "video/"} { + if strings.HasPrefix(mime, prefix) { + return false + } + } + if slices.Contains([]string{ + "application/octet-stream", + "application/wasm", + "application/pdf", + "application/zip", + "application/gzip", + "application/x-protobuf", + "application/x-msgpack", + "application/x-thrift", + }, mime) { + return false + } + // Skip vendor binary formats; allow vnd types with text-based suffixes (+json, +xml, +csv). + if sub, ok := strings.CutPrefix(mime, "application/vnd."); ok { + for _, textSuffix := range []string{"+json", "+xml", "+csv"} { + if strings.HasSuffix(sub, textSuffix) { + return true + } + } + return false + } + return true +} + +// bodyCapFor returns the max body capture size for a MIME type. +// Structured data (JSON, XML, form data) gets 900 KB; everything else gets 10 KB. +func bodyCapFor(mime string) int { + const fullCap = 900 * 1024 + const contextCap = 10 * 1024 + structuredPrefixes := []string{ + "application/json", + "application/xml", + "application/x-www-form-urlencoded", + "application/graphql", + "text/xml", + "text/csv", + } + for _, p := range structuredPrefixes { + if strings.HasPrefix(mime, p) { + return fullCap + } + } + // vnd types with +json/+xml suffix are treated as structured. + for _, suffix := range []string{"+json", "+xml"} { + if strings.HasSuffix(mime, suffix) { + return fullCap + } + } + return contextCap +} + +// truncateBody caps body at the given limit on a valid UTF-8 boundary. +func truncateBody(body string, maxBody int) string { + if len(body) <= maxBody { + return body + } + // Walk back at most UTFMax bytes to find a clean rune boundary. + i := maxBody + for i > maxBody-utf8.UTFMax && !utf8.RuneStart(body[i]) { + i-- + } + return body[:i] +} From 306bc0e4236d7a4b6e40126e4c01fb36c9be99a7 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Thu, 2 Apr 2026 13:31:17 +0000 Subject: [PATCH 26/27] review --- server/lib/cdpmonitor/domains.go | 2 + server/lib/cdpmonitor/handlers.go | 61 +++++++++++++-------- server/lib/cdpmonitor/monitor.go | 83 ++++++++++++++++++++--------- server/lib/cdpmonitor/screenshot.go | 2 +- server/lib/cdpmonitor/types.go | 6 ++- server/lib/cdpmonitor/util.go | 21 +++++++- 6 files changed, 124 insertions(+), 51 deletions(-) diff --git a/server/lib/cdpmonitor/domains.go b/server/lib/cdpmonitor/domains.go index f32932c6..1e95e0b3 100644 --- a/server/lib/cdpmonitor/domains.go +++ b/server/lib/cdpmonitor/domains.go @@ -30,8 +30,10 @@ func (m *Monitor) enableDomains(ctx context.Context, sessionID string) { // injectedJS tracks clicks, keys, and scrolls via the __kernelEvent binding. // Layout shifts are handled natively by PerformanceTimeline.enable. const injectedJS = `(function() { + if (window.__kernelEventInjected) return; var send = window.__kernelEvent; if (!send) return; + window.__kernelEventInjected = true; function sel(el) { return el.id ? '#' + el.id : (el.className ? '.' + String(el.className).split(' ')[0] : ''); diff --git a/server/lib/cdpmonitor/handlers.go b/server/lib/cdpmonitor/handlers.go index 7450dc1c..35664993 100644 --- a/server/lib/cdpmonitor/handlers.go +++ b/server/lib/cdpmonitor/handlers.go @@ -8,7 +8,7 @@ import ( ) // publishEvent stamps common fields and publishes an event. -func (m *Monitor) publishEvent(eventType string, source events.Source, sourceEvent string, data json.RawMessage, sessionID string) { +func (m *Monitor) publishEvent(eventType string, detail events.DetailLevel, source events.Source, sourceEvent string, data json.RawMessage, sessionID string) { src := source src.Event = sourceEvent if sessionID != "" { @@ -17,12 +17,14 @@ func (m *Monitor) publishEvent(eventType string, source events.Source, sourceEve } src.Metadata["cdp_session_id"] = sessionID } + url, _ := m.currentURL.Load().(string) m.publish(events.Event{ Ts: time.Now().UnixMilli(), Type: eventType, Category: events.CategoryFor(eventType), Source: src, - DetailLevel: events.DetailStandard, + DetailLevel: detail, + URL: url, Data: data, }) } @@ -71,11 +73,11 @@ func (m *Monitor) handleConsole(params json.RawMessage, sessionID string) { text := "" if len(p.Args) > 0 { - text = p.Args[0].Value + text = consoleArgString(p.Args[0]) } argValues := make([]string, 0, len(p.Args)) for _, a := range p.Args { - argValues = append(argValues, a.Value) + argValues = append(argValues, consoleArgString(a)) } data, _ := json.Marshal(map[string]any{ "level": p.Type, @@ -83,7 +85,7 @@ func (m *Monitor) handleConsole(params json.RawMessage, sessionID string) { "args": argValues, "stack_trace": p.StackTrace, }) - m.publishEvent("console_log", events.Source{Kind: events.KindCDP}, "Runtime.consoleAPICalled", data, sessionID) + m.publishEvent("console_log", events.DetailStandard, events.Source{Kind: events.KindCDP}, "Runtime.consoleAPICalled", data, sessionID) } func (m *Monitor) handleExceptionThrown(params json.RawMessage, sessionID string) { @@ -98,8 +100,8 @@ func (m *Monitor) handleExceptionThrown(params json.RawMessage, sessionID string "url": p.ExceptionDetails.URL, "stack_trace": p.ExceptionDetails.StackTrace, }) - m.publishEvent("console_error", events.Source{Kind: events.KindCDP}, "Runtime.exceptionThrown", data, sessionID) - go m.maybeScreenshot(m.lifecycleCtx) + m.publishEvent("console_error", events.DetailStandard, events.Source{Kind: events.KindCDP}, "Runtime.exceptionThrown", data, sessionID) + go m.maybeScreenshot(m.getLifecycleCtx()) } // handleBindingCalled processes __kernelEvent binding calls from the page. @@ -123,7 +125,7 @@ func (m *Monitor) handleBindingCalled(params json.RawMessage, sessionID string) } switch header.Type { case "interaction_click", "interaction_key", "scroll_settled": - m.publishEvent(header.Type, events.Source{Kind: events.KindCDP}, "Runtime.bindingCalled", payload, sessionID) + m.publishEvent(header.Type, events.DetailStandard, events.Source{Kind: events.KindCDP}, "Runtime.bindingCalled", payload, sessionID) } } @@ -138,7 +140,7 @@ func (m *Monitor) handleTimelineEvent(params json.RawMessage, sessionID string) if err := json.Unmarshal(params, &p); err != nil || p.Event.Type != "layout-shift" { return } - m.publishEvent("layout_shift", events.Source{Kind: events.KindCDP}, "PerformanceTimeline.timelineEventAdded", params, sessionID) + m.publishEvent("layout_shift", events.DetailStandard, events.Source{Kind: events.KindCDP}, "PerformanceTimeline.timelineEventAdded", params, sessionID) m.computed.onLayoutShift() } @@ -158,6 +160,7 @@ func (m *Monitor) handleNetworkRequest(params json.RawMessage, sessionID string) m.pendReqMu.Lock() m.pendingRequests[p.RequestID] = networkReqState{ + sessionID: sessionID, method: p.Request.Method, url: p.Request.URL, headers: p.Request.Headers, @@ -173,7 +176,7 @@ func (m *Monitor) handleNetworkRequest(params json.RawMessage, sessionID string) "resource_type": p.ResourceType, "initiator_type": initiatorType, }) - m.publishEvent("network_request", events.Source{Kind: events.KindCDP}, "Network.requestWillBeSent", data, sessionID) + m.publishEvent("network_request", events.DetailStandard, events.Source{Kind: events.KindCDP}, "Network.requestWillBeSent", data, sessionID) m.computed.onRequest() } @@ -211,7 +214,7 @@ func (m *Monitor) handleLoadingFinished(params json.RawMessage, sessionID string } // Fetch response body async to avoid blocking readLoop; binary types are skipped. go func() { - ctx := m.lifecycleCtx + ctx := m.getLifecycleCtx() body := "" if isTextualResource(state.resourceType, state.mimeType) { result, err := m.send(ctx, "Network.getResponseBody", map[string]any{ @@ -237,7 +240,11 @@ func (m *Monitor) handleLoadingFinished(params json.RawMessage, sessionID string "resource_type": state.resourceType, "body": body, }) - m.publishEvent("network_response", events.Source{Kind: events.KindCDP}, "Network.loadingFinished", data, sessionID) + detail := events.DetailStandard + if body != "" { + detail = events.DetailVerbose + } + m.publishEvent("network_response", detail, events.Source{Kind: events.KindCDP}, "Network.loadingFinished", data, sessionID) m.computed.onLoadingFinished() }() } @@ -266,7 +273,7 @@ func (m *Monitor) handleLoadingFailed(params json.RawMessage, sessionID string) ev["url"] = state.url } data, _ := json.Marshal(ev) - m.publishEvent("network_loading_failed", events.Source{Kind: events.KindCDP}, "Network.loadingFailed", data, sessionID) + m.publishEvent("network_loading_failed", events.DetailStandard, events.Source{Kind: events.KindCDP}, "Network.loadingFailed", data, sessionID) m.computed.onLoadingFinished() } @@ -287,28 +294,36 @@ func (m *Monitor) handleFrameNavigated(params json.RawMessage, sessionID string) "frame_id": p.Frame.ID, "parent_frame_id": p.Frame.ParentID, }) - m.publishEvent("navigation", events.Source{Kind: events.KindCDP}, "Page.frameNavigated", data, sessionID) + // Only track top-level frame navigations (no parent). + if p.Frame.ParentID == "" { + m.currentURL.Store(p.Frame.URL) + } + m.publishEvent("navigation", events.DetailStandard, events.Source{Kind: events.KindCDP}, "Page.frameNavigated", data, sessionID) m.pendReqMu.Lock() - clear(m.pendingRequests) + for id, req := range m.pendingRequests { + if req.sessionID == sessionID { + delete(m.pendingRequests, id) + } + } m.pendReqMu.Unlock() m.computed.resetOnNavigation() } func (m *Monitor) handleDOMContentLoaded(params json.RawMessage, sessionID string) { - m.publishEvent("dom_content_loaded", events.Source{Kind: events.KindCDP}, "Page.domContentEventFired", params, sessionID) + m.publishEvent("dom_content_loaded", events.DetailMinimal, events.Source{Kind: events.KindCDP}, "Page.domContentEventFired", params, sessionID) m.computed.onDOMContentLoaded() } func (m *Monitor) handleLoadEventFired(params json.RawMessage, sessionID string) { - m.publishEvent("page_load", events.Source{Kind: events.KindCDP}, "Page.loadEventFired", params, sessionID) + m.publishEvent("page_load", events.DetailMinimal, events.Source{Kind: events.KindCDP}, "Page.loadEventFired", params, sessionID) m.computed.onPageLoad() - go m.maybeScreenshot(m.lifecycleCtx) + go m.maybeScreenshot(m.getLifecycleCtx()) } func (m *Monitor) handleDOMUpdated(params json.RawMessage, sessionID string) { - m.publishEvent("dom_updated", events.Source{Kind: events.KindCDP}, "DOM.documentUpdated", params, sessionID) + m.publishEvent("dom_updated", events.DetailMinimal, events.Source{Kind: events.KindCDP}, "DOM.documentUpdated", params, sessionID) } // handleAttachedToTarget stores the new session then enables domains and injects script. @@ -327,8 +342,8 @@ func (m *Monitor) handleAttachedToTarget(msg cdpMessage) { // Async to avoid blocking the readLoop. go func() { - m.enableDomains(m.lifecycleCtx, params.SessionID) - _ = m.injectScript(m.lifecycleCtx, params.SessionID) + m.enableDomains(m.getLifecycleCtx(), params.SessionID) + _ = m.injectScript(m.getLifecycleCtx(), params.SessionID) }() } @@ -342,7 +357,7 @@ func (m *Monitor) handleTargetCreated(params json.RawMessage, sessionID string) "target_type": p.TargetInfo.Type, "url": p.TargetInfo.URL, }) - m.publishEvent("target_created", events.Source{Kind: events.KindCDP}, "Target.targetCreated", data, sessionID) + m.publishEvent("target_created", events.DetailMinimal, events.Source{Kind: events.KindCDP}, "Target.targetCreated", data, sessionID) } func (m *Monitor) handleTargetDestroyed(params json.RawMessage, sessionID string) { @@ -355,5 +370,5 @@ func (m *Monitor) handleTargetDestroyed(params json.RawMessage, sessionID string data, _ := json.Marshal(map[string]any{ "target_id": p.TargetID, }) - m.publishEvent("target_destroyed", events.Source{Kind: events.KindCDP}, "Target.targetDestroyed", data, sessionID) + m.publishEvent("target_destroyed", events.DetailMinimal, events.Source{Kind: events.KindCDP}, "Target.targetDestroyed", data, sessionID) } diff --git a/server/lib/cdpmonitor/monitor.go b/server/lib/cdpmonitor/monitor.go index 3151375d..4422c8a4 100644 --- a/server/lib/cdpmonitor/monitor.go +++ b/server/lib/cdpmonitor/monitor.go @@ -24,19 +24,19 @@ type PublishFunc func(ev events.Event) const wsReadLimit = 8 * 1024 * 1024 // Monitor manages a CDP WebSocket connection with auto-attach session fan-out. -// Reusable: Stop followed by Start reconnects cleanly. All exported methods are -// safe to call concurrently. Stop blocks until the read goroutine exits. type Monitor struct { upstreamMgr UpstreamProvider publish PublishFunc displayNum int + // lifeMu serializes Start, Stop, and restartReadLoop to prevent races on + // conn, lifecycleCtx, cancel, and done. + lifeMu sync.Mutex conn *websocket.Conn - connMu sync.Mutex - nextID atomic.Int64 - pendMu sync.Mutex - pending map[int64]chan cdpMessage + nextID atomic.Int64 + pendMu sync.Mutex + pending map[int64]chan cdpMessage sessionsMu sync.RWMutex sessions map[string]targetInfo // sessionID → targetInfo @@ -44,6 +44,8 @@ type Monitor struct { pendReqMu sync.Mutex pendingRequests map[string]networkReqState // requestId → networkReqState + currentURL atomic.Value // last URL from Page.frameNavigated + computed *computedState lastScreenshotAt atomic.Int64 // unix millis of last capture @@ -76,6 +78,14 @@ func (m *Monitor) IsRunning() bool { return m.running.Load() } +// getLifecycleCtx returns the current lifecycle context under lifeMu. +func (m *Monitor) getLifecycleCtx() context.Context { + m.lifeMu.Lock() + ctx := m.lifecycleCtx + m.lifeMu.Unlock() + return ctx +} + // Start begins CDP capture. Restarts if already running. func (m *Monitor) Start(parentCtx context.Context) error { if m.running.Load() { @@ -97,13 +107,12 @@ func (m *Monitor) Start(parentCtx context.Context) error { } conn.SetReadLimit(wsReadLimit) - m.connMu.Lock() + m.lifeMu.Lock() m.conn = conn - m.connMu.Unlock() - m.lifecycleCtx = ctx m.cancel = cancel m.done = make(chan struct{}) + m.lifeMu.Unlock() m.running.Store(true) @@ -119,18 +128,31 @@ func (m *Monitor) Stop() { if !m.running.Swap(false) { return } + + m.lifeMu.Lock() if m.cancel != nil { m.cancel() } - if m.done != nil { - <-m.done + done := m.done + m.lifeMu.Unlock() + + if done != nil { + <-done } - m.connMu.Lock() + + m.lifeMu.Lock() if m.conn != nil { _ = m.conn.Close(websocket.StatusNormalClosure, "stopped") m.conn = nil } - m.connMu.Unlock() + m.lifeMu.Unlock() + + m.clearState() +} + +// clearState resets sessions, pending requests, and computed state. +func (m *Monitor) clearState() { + m.currentURL.Store("") m.sessionsMu.Lock() m.sessions = make(map[string]targetInfo) @@ -145,11 +167,12 @@ func (m *Monitor) Stop() { // readLoop reads CDP messages, routing responses to pending callers and dispatching events. func (m *Monitor) readLoop(ctx context.Context) { - defer close(m.done) - - m.connMu.Lock() + m.lifeMu.Lock() + done := m.done conn := m.conn - m.connMu.Unlock() + m.lifeMu.Unlock() + defer close(done) + if conn == nil { return } @@ -211,13 +234,14 @@ func (m *Monitor) send(ctx context.Context, method string, params any, sessionID m.pendMu.Unlock() }() - m.connMu.Lock() + m.lifeMu.Lock() conn := m.conn - m.connMu.Unlock() + m.lifeMu.Unlock() if conn == nil { return nil, fmt.Errorf("cdpmonitor: connection not open") } + // coder/websocket allows concurrent Read + Write on the same Conn. if err := conn.Write(ctx, websocket.MessageText, reqBytes); err != nil { return nil, fmt.Errorf("write: %w", err) } @@ -296,8 +320,16 @@ func (m *Monitor) attachExistingTargets(ctx context.Context) { // restartReadLoop waits for the current readLoop to exit, then starts a new one. func (m *Monitor) restartReadLoop(ctx context.Context) { - <-m.done + m.lifeMu.Lock() + done := m.done + m.lifeMu.Unlock() + + <-done + + m.lifeMu.Lock() m.done = make(chan struct{}) + m.lifeMu.Unlock() + go m.readLoop(ctx) } @@ -332,12 +364,15 @@ func (m *Monitor) subscribeToUpstream(ctx context.Context) { startReconnect := time.Now() - m.connMu.Lock() + m.lifeMu.Lock() if m.conn != nil { _ = m.conn.Close(websocket.StatusNormalClosure, "reconnecting") m.conn = nil } - m.connMu.Unlock() + m.lifeMu.Unlock() + + // Clear stale state from the previous Chrome instance. + m.clearState() var reconnErr error for attempt := range 10 { @@ -359,9 +394,9 @@ func (m *Monitor) subscribeToUpstream(ctx context.Context) { } conn.SetReadLimit(wsReadLimit) - m.connMu.Lock() + m.lifeMu.Lock() m.conn = conn - m.connMu.Unlock() + m.lifeMu.Unlock() reconnErr = nil break diff --git a/server/lib/cdpmonitor/screenshot.go b/server/lib/cdpmonitor/screenshot.go index 54b7b985..abb559d2 100644 --- a/server/lib/cdpmonitor/screenshot.go +++ b/server/lib/cdpmonitor/screenshot.go @@ -52,7 +52,7 @@ func (m *Monitor) captureScreenshot(ctx context.Context) { } encoded := base64.StdEncoding.EncodeToString(pngBytes) - data := json.RawMessage(fmt.Sprintf(`{"png":%q}`, encoded)) + data, _ := json.Marshal(map[string]string{"png": encoded}) m.publish(events.Event{ Ts: time.Now().UnixMilli(), diff --git a/server/lib/cdpmonitor/types.go b/server/lib/cdpmonitor/types.go index c61c3335..9beab2bf 100644 --- a/server/lib/cdpmonitor/types.go +++ b/server/lib/cdpmonitor/types.go @@ -34,6 +34,7 @@ type cdpMessage struct { // networkReqState holds request + response metadata until loadingFinished. type networkReqState struct { + sessionID string method string url string headers json.RawMessage @@ -46,9 +47,10 @@ type networkReqState struct { } // cdpConsoleArg is a single Runtime.consoleAPICalled argument. +// Value is json.RawMessage because CDP sends strings, numbers, objects, etc. type cdpConsoleArg struct { - Type string `json:"type"` - Value string `json:"value"` + Type string `json:"type"` + Value json.RawMessage `json:"value,omitempty"` } // cdpConsoleParams is the shape of Runtime.consoleAPICalled params. diff --git a/server/lib/cdpmonitor/util.go b/server/lib/cdpmonitor/util.go index 5c29fad9..5dae2fce 100644 --- a/server/lib/cdpmonitor/util.go +++ b/server/lib/cdpmonitor/util.go @@ -1,11 +1,27 @@ package cdpmonitor import ( + "encoding/json" "slices" "strings" "unicode/utf8" ) +// consoleArgString extracts a display string from a CDP console argument. +// For strings it unquotes the JSON value; for other types it returns the raw JSON. +func consoleArgString(a cdpConsoleArg) string { + if len(a.Value) == 0 { + return a.Type // e.g. "undefined", "null" + } + if a.Type == "string" { + var s string + if json.Unmarshal(a.Value, &s) == nil { + return s + } + } + return string(a.Value) +} + // isTextualResource reports whether the resource warrants body capture. // resourceType is checked first; mimeType is a fallback for resources with no type (e.g. in-flight at attach time). func isTextualResource(resourceType, mimeType string) bool { @@ -20,7 +36,7 @@ func isTextualResource(resourceType, mimeType string) bool { // Binary formats (vendor types, binary encodings, raw streams) are excluded. func isCapturedMIME(mime string) bool { if mime == "" { - return true // unknown, capture conservatively + return false // unknown } for _, prefix := range []string{"image/", "font/", "audio/", "video/"} { if strings.HasPrefix(mime, prefix) { @@ -83,6 +99,9 @@ func truncateBody(body string, maxBody int) string { if len(body) <= maxBody { return body } + if maxBody <= utf8.UTFMax { + return body[:maxBody] + } // Walk back at most UTFMax bytes to find a clean rune boundary. i := maxBody for i > maxBody-utf8.UTFMax && !utf8.RuneStart(body[i]) { From a775160c0c7fcc77f61b2baa893872ef25c62a46 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Thu, 2 Apr 2026 14:02:47 +0000 Subject: [PATCH 27/27] review: update test --- server/lib/cdpmonitor/monitor_test.go | 1414 ++++++++++++------------- 1 file changed, 651 insertions(+), 763 deletions(-) diff --git a/server/lib/cdpmonitor/monitor_test.go b/server/lib/cdpmonitor/monitor_test.go index d16104f1..8f793340 100644 --- a/server/lib/cdpmonitor/monitor_test.go +++ b/server/lib/cdpmonitor/monitor_test.go @@ -18,20 +18,27 @@ import ( "github.com/stretchr/testify/require" ) +// --------------------------------------------------------------------------- +// Test infrastructure +// --------------------------------------------------------------------------- + // fakeCDPServer is a minimal WebSocket server that accepts connections and // lets the test drive scripted message sequences. type fakeCDPServer struct { srv *httptest.Server conn *websocket.Conn connMu sync.Mutex - msgCh chan []byte // inbound messages from Monitor + connCh chan struct{} // closed when the first connection is accepted + msgCh chan []byte // inbound messages from Monitor } func newFakeCDPServer(t *testing.T) *fakeCDPServer { t.Helper() f := &fakeCDPServer{ - msgCh: make(chan []byte, 128), + msgCh: make(chan []byte, 128), + connCh: make(chan struct{}), } + var connOnce sync.Once f.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { c, err := websocket.Accept(w, r, &websocket.AcceptOptions{InsecureSkipVerify: true}) if err != nil { @@ -40,7 +47,7 @@ func newFakeCDPServer(t *testing.T) *fakeCDPServer { f.connMu.Lock() f.conn = c f.connMu.Unlock() - // drain messages from Monitor into msgCh until connection closes + connOnce.Do(func() { close(f.connCh) }) go func() { for { _, b, err := c.Read(context.Background()) @@ -54,23 +61,19 @@ func newFakeCDPServer(t *testing.T) *fakeCDPServer { return f } -// wsURL returns a ws:// URL pointing at the fake server. func (f *fakeCDPServer) wsURL() string { return "ws" + strings.TrimPrefix(f.srv.URL, "http") } -// sendToMonitor pushes a raw JSON message to the Monitor's readLoop. func (f *fakeCDPServer) sendToMonitor(t *testing.T, msg any) { t.Helper() f.connMu.Lock() c := f.conn f.connMu.Unlock() require.NotNil(t, c, "no active connection") - err := wsjson.Write(context.Background(), c, msg) - require.NoError(t, err) + require.NoError(t, wsjson.Write(context.Background(), c, msg)) } -// readFromMonitor blocks until the Monitor sends a message (with timeout). func (f *fakeCDPServer) readFromMonitor(t *testing.T, timeout time.Duration) cdpMessage { t.Helper() select { @@ -129,7 +132,6 @@ func (f *fakeUpstream) Subscribe() (<-chan string, func()) { return ch, cancel } -// notifyRestart simulates Chrome restarting with a new DevTools URL. func (f *fakeUpstream) notifyRestart(newURL string) { f.mu.Lock() f.current = newURL @@ -144,57 +146,217 @@ func (f *fakeUpstream) notifyRestart(newURL string) { } } -// --- Tests --- +// eventCollector captures published events with channel-based notification. +type eventCollector struct { + mu sync.Mutex + events []events.Event + notify chan struct{} // signaled on every publish +} -// TestMonitorStart verifies that Monitor.Start() dials the URL from -// UpstreamProvider.Current() and establishes an isolated WebSocket connection. -func TestMonitorStart(t *testing.T) { - srv := newFakeCDPServer(t) - defer srv.close() +func newEventCollector() *eventCollector { + return &eventCollector{notify: make(chan struct{}, 256)} +} + +func (c *eventCollector) publishFn() PublishFunc { + return func(ev events.Event) { + c.mu.Lock() + c.events = append(c.events, ev) + c.mu.Unlock() + select { + case c.notify <- struct{}{}: + default: + } + } +} + +// waitFor blocks until an event of the given type is published, or fails. +func (c *eventCollector) waitFor(t *testing.T, eventType string, timeout time.Duration) events.Event { + t.Helper() + deadline := time.After(timeout) + for { + c.mu.Lock() + for _, ev := range c.events { + if ev.Type == eventType { + c.mu.Unlock() + return ev + } + } + c.mu.Unlock() + select { + case <-c.notify: + case <-deadline: + t.Fatalf("timeout waiting for event type=%q", eventType) + return events.Event{} + } + } +} + +// waitForNew blocks until a NEW event of the given type is published after this +// call, ignoring any events already in the collector. +func (c *eventCollector) waitForNew(t *testing.T, eventType string, timeout time.Duration) events.Event { + t.Helper() + c.mu.Lock() + skip := len(c.events) + c.mu.Unlock() + + deadline := time.After(timeout) + for { + c.mu.Lock() + for i := skip; i < len(c.events); i++ { + if c.events[i].Type == eventType { + ev := c.events[i] + c.mu.Unlock() + return ev + } + } + c.mu.Unlock() + select { + case <-c.notify: + case <-deadline: + t.Fatalf("timeout waiting for new event type=%q", eventType) + return events.Event{} + } + } +} + +// assertNone verifies that no event of the given type arrives within d. +func (c *eventCollector) assertNone(t *testing.T, eventType string, d time.Duration) { + t.Helper() + deadline := time.After(d) + for { + select { + case <-c.notify: + c.mu.Lock() + for _, ev := range c.events { + if ev.Type == eventType { + c.mu.Unlock() + t.Fatalf("unexpected event %q published", eventType) + return + } + } + c.mu.Unlock() + case <-deadline: + return + } + } +} + + +// ResponderFunc is called for each CDP command the Monitor sends. +// Return nil to use the default empty result. +type ResponderFunc func(msg cdpMessage) any + +// listenAndRespond drains srv.msgCh, calls fn for each command, and sends the +// response. If fn is nil or returns nil, sends {"id": msg.ID, "result": {}}. +func listenAndRespond(srv *fakeCDPServer, stopCh <-chan struct{}, fn ResponderFunc) { + for { + select { + case b := <-srv.msgCh: + var msg cdpMessage + if json.Unmarshal(b, &msg) != nil || msg.ID == 0 { + continue + } + srv.connMu.Lock() + c := srv.conn + srv.connMu.Unlock() + if c == nil { + continue + } + var resp any + if fn != nil { + resp = fn(msg) + } + if resp == nil { + resp = map[string]any{"id": msg.ID, "result": map[string]any{}} + } + _ = wsjson.Write(context.Background(), c, resp) + case <-stopCh: + return + } + } +} +// startMonitor creates a Monitor against srv, starts it, waits for the +// connection, and launches a responder goroutine. Returns cleanup func. +func startMonitor(t *testing.T, srv *fakeCDPServer, fn ResponderFunc) (*Monitor, *eventCollector, func()) { + t.Helper() + ec := newEventCollector() upstream := newFakeUpstream(srv.wsURL()) - var published []events.Event - var publishMu sync.Mutex - publishFn := func(ev events.Event) { - publishMu.Lock() - published = append(published, ev) - publishMu.Unlock() + m := New(upstream, ec.publishFn(), 99) + require.NoError(t, m.Start(context.Background())) + + stopResponder := make(chan struct{}) + go listenAndRespond(srv, stopResponder, fn) + + // Wait for the websocket connection to be established. + select { + case <-srv.connCh: + case <-time.After(3 * time.Second): + t.Fatal("fake server never received a connection") } + // Wait for the init sequence (setAutoAttach + domain enables + script injection + // + getTargets) to complete. The responder goroutine handles all responses; + // we just need to wait for the burst to finish. + waitForInitDone(t, srv) - m := New(upstream, publishFn, 99) + cleanup := func() { + close(stopResponder) + m.Stop() + } + return m, ec, cleanup +} - ctx := context.Background() - err := m.Start(ctx) - require.NoError(t, err) - defer m.Stop() +// waitForInitDone waits for the Monitor's init sequence to complete by +// detecting a 100ms gap in activity on the message channel. The responder +// goroutine handles responses; this just waits for the burst to end. +func waitForInitDone(t *testing.T, _ *fakeCDPServer) { + t.Helper() + // The init sequence sends ~8 commands. Wait until the responder has + // processed them all by checking for a quiet period. + deadline := time.After(5 * time.Second) + for { + select { + case <-time.After(100 * time.Millisecond): + return + case <-deadline: + t.Fatal("init sequence did not complete") + } + } +} - // Give readLoop time to start and send the setAutoAttach command. - // We just verify the connection was made and the Monitor is running. - assert.True(t, m.IsRunning()) +// newComputedMonitor creates an unconnected Monitor for testing computed state +// (network_idle, layout_settled, navigation_settled) without a real websocket. +func newComputedMonitor(t *testing.T) (*Monitor, *eventCollector) { + t.Helper() + ec := newEventCollector() + upstream := newFakeUpstream("ws://127.0.0.1:0") + m := New(upstream, ec.publishFn(), 0) + return m, ec +} - // Read the first message sent by the Monitor — it should be Target.setAutoAttach. - msg := srv.readFromMonitor(t, 3*time.Second) - assert.Equal(t, "Target.setAutoAttach", msg.Method) +// navigateMonitor sends a Page.frameNavigated to reset computed state. +func navigateMonitor(m *Monitor, url string) { + p, _ := json.Marshal(map[string]any{ + "frame": map[string]any{"id": "f1", "url": url}, + }) + m.handleFrameNavigated(p, "s1") } -// TestAutoAttach verifies that after Start(), the Monitor sends -// Target.setAutoAttach{autoAttach:true, waitForDebuggerOnStart:false, flatten:true} -// and that on receiving Target.attachedToTarget the session is stored. +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + func TestAutoAttach(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() + ec := newEventCollector() upstream := newFakeUpstream(srv.wsURL()) - publishFn := func(ev events.Event) {} - - m := New(upstream, publishFn, 99) - - ctx := context.Background() - err := m.Start(ctx) - require.NoError(t, err) + m := New(upstream, ec.publishFn(), 99) + require.NoError(t, m.Start(context.Background())) defer m.Stop() - // Read the setAutoAttach request from the Monitor. + // The first command should be Target.setAutoAttach with correct params. msg := srv.readFromMonitor(t, 3*time.Second) assert.Equal(t, "Target.setAutoAttach", msg.Method) @@ -208,654 +370,434 @@ func TestAutoAttach(t *testing.T) { assert.False(t, params.WaitForDebuggerOnStart) assert.True(t, params.Flatten) - // Acknowledge the command with a response. - srv.sendToMonitor(t, map[string]any{ - "id": msg.ID, - "result": map[string]any{}, - }) - - // Drain any domain-enable commands sent after setAutoAttach. - // The Monitor calls enableDomains (Runtime.enable, Network.enable, Page.enable, DOM.enable). - drainTimeout := time.NewTimer(500 * time.Millisecond) - for { - select { - case b := <-srv.msgCh: - var m2 cdpMessage - _ = json.Unmarshal(b, &m2) - // respond to enable commands - srv.connMu.Lock() - c := srv.conn - srv.connMu.Unlock() - if c != nil && m2.ID != 0 { - _ = wsjson.Write(context.Background(), c, map[string]any{ - "id": m2.ID, - "result": map[string]any{}, - }) - } - case <-drainTimeout.C: - goto afterDrain - } - } -afterDrain: + // Respond and drain domain-enable commands. + stopResponder := make(chan struct{}) + go listenAndRespond(srv, stopResponder, nil) + defer close(stopResponder) + srv.sendToMonitor(t, map[string]any{"id": msg.ID, "result": map[string]any{}}) - // Now simulate Target.attachedToTarget event. - const testSessionID = "session-abc-123" - const testTargetID = "target-xyz-456" + // Simulate Target.attachedToTarget — session should be stored. srv.sendToMonitor(t, map[string]any{ "method": "Target.attachedToTarget", "params": map[string]any{ - "sessionId": testSessionID, - "targetInfo": map[string]any{ - "targetId": testTargetID, - "type": "page", - "url": "https://example.com", - }, + "sessionId": "session-abc", + "targetInfo": map[string]any{"targetId": "target-xyz", "type": "page", "url": "https://example.com"}, }, }) - - // Give the Monitor time to process the event and store the session. require.Eventually(t, func() bool { m.sessionsMu.RLock() defer m.sessionsMu.RUnlock() - _, ok := m.sessions[testSessionID] + _, ok := m.sessions["session-abc"] return ok - }, 2*time.Second, 50*time.Millisecond, "session not stored after attachedToTarget") + }, 2*time.Second, 50*time.Millisecond, "session not stored") m.sessionsMu.RLock() - info := m.sessions[testSessionID] + info := m.sessions["session-abc"] m.sessionsMu.RUnlock() - assert.Equal(t, testTargetID, info.targetID) + assert.Equal(t, "target-xyz", info.targetID) assert.Equal(t, "page", info.targetType) } -// TestLifecycle verifies the idle→running→stopped→restart state machine. func TestLifecycle(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() + ec := newEventCollector() upstream := newFakeUpstream(srv.wsURL()) - publishFn := func(ev events.Event) {} - - m := New(upstream, publishFn, 99) - - // Idle at boot. - assert.False(t, m.IsRunning(), "should be idle at boot") + m := New(upstream, ec.publishFn(), 99) - ctx := context.Background() + assert.False(t, m.IsRunning(), "idle at boot") - // First Start. - err := m.Start(ctx) - require.NoError(t, err) - assert.True(t, m.IsRunning(), "should be running after Start") - - // Drain the setAutoAttach message. - select { - case <-srv.msgCh: - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for setAutoAttach") - } + require.NoError(t, m.Start(context.Background())) + assert.True(t, m.IsRunning(), "running after Start") + srv.readFromMonitor(t, 2*time.Second) // drain setAutoAttach - // Stop. m.Stop() - assert.False(t, m.IsRunning(), "should be stopped after Stop") + assert.False(t, m.IsRunning(), "stopped after Stop") - // Second Start while stopped — should start fresh. - err = m.Start(ctx) - require.NoError(t, err) - assert.True(t, m.IsRunning(), "should be running after second Start") + // Restart while stopped. + require.NoError(t, m.Start(context.Background())) + assert.True(t, m.IsRunning(), "running after second Start") + srv.readFromMonitor(t, 2*time.Second) - // Drain the setAutoAttach message for the second start. - select { - case <-srv.msgCh: - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for setAutoAttach on second start") - } - - // Second Start while already running — stop+restart. - err = m.Start(ctx) - require.NoError(t, err) - assert.True(t, m.IsRunning(), "should be running after stop+restart") + // Restart while running — implicit Stop+Start. + require.NoError(t, m.Start(context.Background())) + assert.True(t, m.IsRunning(), "running after implicit restart") m.Stop() - assert.False(t, m.IsRunning(), "should be stopped at end") + assert.False(t, m.IsRunning(), "stopped at end") } -// TestReconnect verifies that when UpstreamManager emits a new URL (Chrome restart), -// the monitor emits monitor_disconnected, reconnects, and emits monitor_reconnected. func TestReconnect(t *testing.T) { srv1 := newFakeCDPServer(t) upstream := newFakeUpstream(srv1.wsURL()) - - var published []events.Event - var publishMu sync.Mutex - var publishCount atomic.Int32 - publishFn := func(ev events.Event) { - publishMu.Lock() - published = append(published, ev) - publishMu.Unlock() - publishCount.Add(1) - } - - m := New(upstream, publishFn, 99) - - ctx := context.Background() - err := m.Start(ctx) - require.NoError(t, err) + ec := newEventCollector() + m := New(upstream, ec.publishFn(), 99) + require.NoError(t, m.Start(context.Background())) defer m.Stop() - // Drain setAutoAttach from srv1. - select { - case <-srv1.msgCh: - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for initial setAutoAttach") - } + srv1.readFromMonitor(t, 2*time.Second) // drain setAutoAttach - // Set up srv2 as the new Chrome URL. srv2 := newFakeCDPServer(t) defer srv2.close() defer srv1.close() - // Trigger Chrome restart notification. upstream.notifyRestart(srv2.wsURL()) - // Wait for monitor_disconnected event. - require.Eventually(t, func() bool { - publishMu.Lock() - defer publishMu.Unlock() - for _, ev := range published { - if ev.Type == "monitor_disconnected" { - return true - } - } - return false - }, 3*time.Second, 50*time.Millisecond, "monitor_disconnected not published") + ec.waitFor(t, "monitor_disconnected", 3*time.Second) - // Wait for the Monitor to connect to srv2 and send setAutoAttach. - select { - case <-srv2.msgCh: - // setAutoAttach received on srv2 - case <-time.After(5*time.Second): - t.Fatal("timeout waiting for setAutoAttach on srv2 after reconnect") - } + // Wait for the Monitor to reconnect to srv2. + srv2.readFromMonitor(t, 5*time.Second) - // Wait for monitor_reconnected event. - require.Eventually(t, func() bool { - publishMu.Lock() - defer publishMu.Unlock() - for _, ev := range published { - if ev.Type == "monitor_reconnected" { - return true - } - } - return false - }, 3*time.Second, 50*time.Millisecond, "monitor_reconnected not published") - - // Verify monitor_reconnected contains reconnect_duration_ms. - publishMu.Lock() - var reconnEv events.Event - for _, ev := range published { - if ev.Type == "monitor_reconnected" { - reconnEv = ev - break - } - } - publishMu.Unlock() - - require.NotEmpty(t, reconnEv.Type) + ev := ec.waitFor(t, "monitor_reconnected", 3*time.Second) var data map[string]any - require.NoError(t, json.Unmarshal(reconnEv.Data, &data)) - _, hasField := data["reconnect_duration_ms"] - assert.True(t, hasField, "monitor_reconnected missing reconnect_duration_ms field") -} - -// listenAndRespondAll drains srv.msgCh and responds with empty results until stopCh is closed. -func listenAndRespondAll(srv *fakeCDPServer, stopCh <-chan struct{}) { - for { - select { - case b := <-srv.msgCh: - var msg cdpMessage - if err := json.Unmarshal(b, &msg); err != nil { - continue - } - if msg.ID == 0 { - continue - } - srv.connMu.Lock() - c := srv.conn - srv.connMu.Unlock() - if c != nil { - _ = wsjson.Write(context.Background(), c, map[string]any{ - "id": msg.ID, - "result": map[string]any{}, - }) - } - case <-stopCh: - return - } - } -} - - -// startMonitorWithFakeServer is a helper that starts a monitor against a fake CDP server, -// drains the initial setAutoAttach + domain-enable commands, and returns a cleanup func. -func startMonitorWithFakeServer(t *testing.T, srv *fakeCDPServer) (*Monitor, *[]events.Event, *sync.Mutex, func()) { - t.Helper() - published := make([]events.Event, 0, 32) - var mu sync.Mutex - publishFn := func(ev events.Event) { - mu.Lock() - published = append(published, ev) - mu.Unlock() - } - upstream := newFakeUpstream(srv.wsURL()) - m := New(upstream, publishFn, 99) - ctx := context.Background() - require.NoError(t, m.Start(ctx)) - - stopResponder := make(chan struct{}) - go listenAndRespondAll(srv, stopResponder) - - cleanup := func() { - close(stopResponder) - m.Stop() - } - // Wait until the fake server has an active connection. - require.Eventually(t, func() bool { - srv.connMu.Lock() - defer srv.connMu.Unlock() - return srv.conn != nil - }, 3*time.Second, 20*time.Millisecond, "fake server never received a connection") - // Allow the readLoop and init commands to settle before sending test events. - time.Sleep(150 * time.Millisecond) - return m, &published, &mu, cleanup -} - -// waitForEvent blocks until an event of the given type is published, or times out. -func waitForEvent(t *testing.T, published *[]events.Event, mu *sync.Mutex, eventType string, timeout time.Duration) events.Event { - t.Helper() - deadline := time.Now().Add(timeout) - for time.Now().Before(deadline) { - mu.Lock() - for _, ev := range *published { - if ev.Type == eventType { - mu.Unlock() - return ev - } - } - mu.Unlock() - time.Sleep(20 * time.Millisecond) - } - t.Fatalf("timeout waiting for event type=%q", eventType) - return events.Event{} + require.NoError(t, json.Unmarshal(ev.Data, &data)) + _, ok := data["reconnect_duration_ms"] + assert.True(t, ok, "missing reconnect_duration_ms") } - -// TestConsoleEvents verifies console_log, console_error, and [KERNEL_EVENT] sentinel routing. func TestConsoleEvents(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + _, ec, cleanup := startMonitor(t, srv, nil) defer cleanup() - // 1. consoleAPICalled → console_log - srv.sendToMonitor(t, map[string]any{ - "method": "Runtime.consoleAPICalled", - "params": map[string]any{ - "type": "log", - "args": []any{map[string]any{"type": "string", "value": "hello world"}}, - "executionContextId": 1, - }, + t.Run("console_log", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.consoleAPICalled", + "params": map[string]any{ + "type": "log", + "args": []any{map[string]any{"type": "string", "value": "hello world"}}, + }, + }) + ev := ec.waitFor(t, "console_log", 2*time.Second) + assert.Equal(t, events.CategoryConsole, ev.Category) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "Runtime.consoleAPICalled", ev.Source.Event) + assert.Equal(t, events.DetailStandard, ev.DetailLevel) + + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "log", data["level"]) + assert.Equal(t, "hello world", data["text"]) }) - ev := waitForEvent(t, published, mu, "console_log", 2*time.Second) - assert.Equal(t, events.CategoryConsole, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "Runtime.consoleAPICalled", ev.Source.Event) - assert.Equal(t, events.DetailStandard, ev.DetailLevel) - var data map[string]any - require.NoError(t, json.Unmarshal(ev.Data, &data)) - assert.Equal(t, "log", data["level"]) - assert.Equal(t, "hello world", data["text"]) - // 2. exceptionThrown → console_error - srv.sendToMonitor(t, map[string]any{ - "method": "Runtime.exceptionThrown", - "params": map[string]any{ - "timestamp": 1234.5, - "exceptionDetails": map[string]any{ - "text": "Uncaught TypeError", - "lineNumber": 42, - "columnNumber": 7, - "url": "https://example.com/app.js", + t.Run("exception_thrown", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.exceptionThrown", + "params": map[string]any{ + "timestamp": 1234.5, + "exceptionDetails": map[string]any{ + "text": "Uncaught TypeError", + "lineNumber": 42, + "columnNumber": 7, + "url": "https://example.com/app.js", + }, }, - }, + }) + ev := ec.waitFor(t, "console_error", 2*time.Second) + assert.Equal(t, events.CategoryConsole, ev.Category) + assert.Equal(t, events.DetailStandard, ev.DetailLevel) + + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "Uncaught TypeError", data["text"]) + assert.Equal(t, float64(42), data["line"]) }) - ev2 := waitForEvent(t, published, mu, "console_error", 2*time.Second) - assert.Equal(t, events.CategoryConsole, ev2.Category) - assert.Equal(t, events.KindCDP, ev2.Source.Kind) - assert.Equal(t, "Runtime.exceptionThrown", ev2.Source.Event) - assert.Equal(t, events.DetailStandard, ev2.DetailLevel) - var data2 map[string]any - require.NoError(t, json.Unmarshal(ev2.Data, &data2)) - assert.Equal(t, "Uncaught TypeError", data2["text"]) - assert.Equal(t, float64(42), data2["line"]) - assert.Equal(t, float64(7), data2["column"]) - - // 3. Runtime.bindingCalled → interaction_click (via __kernelEvent binding) - srv.sendToMonitor(t, map[string]any{ - "method": "Runtime.bindingCalled", - "params": map[string]any{ - "name": "__kernelEvent", - "payload": `{"type":"interaction_click","x":10,"y":20,"selector":"button","tag":"BUTTON","text":"OK"}`, - }, + + t.Run("non_string_args", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.consoleAPICalled", + "params": map[string]any{ + "type": "log", + "args": []any{ + map[string]any{"type": "number", "value": 42}, + map[string]any{"type": "object", "value": map[string]any{"key": "val"}}, + map[string]any{"type": "undefined"}, + }, + }, + }) + ev := ec.waitForNew(t, "console_log", 2*time.Second) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + args := data["args"].([]any) + assert.Equal(t, "42", args[0]) + assert.Contains(t, args[1], "key") + assert.Equal(t, "undefined", args[2]) }) - ev3 := waitForEvent(t, published, mu, "interaction_click", 2*time.Second) - assert.Equal(t, events.CategoryInteraction, ev3.Category) - assert.Equal(t, "Runtime.bindingCalled", ev3.Source.Event) } -// TestNetworkEvents verifies network_request, network_response, and network_loading_failed. func TestNetworkEvents(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - published := make([]events.Event, 0, 32) - var mu sync.Mutex - upstream := newFakeUpstream(srv.wsURL()) - m := New(upstream, func(ev events.Event) { - mu.Lock() - published = append(published, ev) - mu.Unlock() - }, 99) - ctx := context.Background() - require.NoError(t, m.Start(ctx)) - defer m.Stop() - - // Responder goroutine: answer all commands from the monitor. - // For Network.getResponseBody, return a real body; for everything else return {}. - stopResponder := make(chan struct{}) - defer close(stopResponder) - go func() { - for { - select { - case b := <-srv.msgCh: - var msg cdpMessage - if err := json.Unmarshal(b, &msg); err != nil { - continue - } - if msg.ID == 0 { - continue - } - srv.connMu.Lock() - c := srv.conn - srv.connMu.Unlock() - if c == nil { - continue - } - var resp any - if msg.Method == "Network.getResponseBody" { - resp = map[string]any{ - "id": msg.ID, - "result": map[string]any{"body": `{"ok":true}`, "base64Encoded": false}, - } - } else { - resp = map[string]any{"id": msg.ID, "result": map[string]any{}} - } - _ = wsjson.Write(context.Background(), c, resp) - case <-stopResponder: - return + // Custom responder: return a body for Network.getResponseBody. + responder := func(msg cdpMessage) any { + if msg.Method == "Network.getResponseBody" { + return map[string]any{ + "id": msg.ID, + "result": map[string]any{"body": `{"ok":true}`, "base64Encoded": false}, } } - }() - - // Wait for connection. - require.Eventually(t, func() bool { - srv.connMu.Lock() - defer srv.connMu.Unlock() - return srv.conn != nil - }, 3*time.Second, 20*time.Millisecond) - time.Sleep(150 * time.Millisecond) - - const reqID = "req-001" + return nil + } + _, ec, cleanup := startMonitor(t, srv, responder) + defer cleanup() - // 1. requestWillBeSent → network_request - srv.sendToMonitor(t, map[string]any{ - "method": "Network.requestWillBeSent", - "params": map[string]any{ - "requestId": reqID, - "resourceType": "XHR", - "request": map[string]any{ - "method": "POST", - "url": "https://api.example.com/data", - "headers": map[string]any{"Content-Type": "application/json"}, + t.Run("request_and_response", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Network.requestWillBeSent", + "params": map[string]any{ + "requestId": "req-001", + "resourceType": "XHR", + "request": map[string]any{ + "method": "POST", + "url": "https://api.example.com/data", + "headers": map[string]any{"Content-Type": "application/json"}, + }, + "initiator": map[string]any{"type": "script"}, }, - "initiator": map[string]any{"type": "script"}, - }, - }) - ev := waitForEvent(t, &published, &mu, "network_request", 2*time.Second) - assert.Equal(t, events.CategoryNetwork, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "Network.requestWillBeSent", ev.Source.Event) - var data map[string]any - require.NoError(t, json.Unmarshal(ev.Data, &data)) - assert.Equal(t, "POST", data["method"]) - assert.Equal(t, "https://api.example.com/data", data["url"]) - - // 2. responseReceived + loadingFinished → network_response (with body via getResponseBody) - srv.sendToMonitor(t, map[string]any{ - "method": "Network.responseReceived", - "params": map[string]any{ - "requestId": reqID, - "response": map[string]any{ - "status": 200, - "statusText": "OK", - "url": "https://api.example.com/data", - "headers": map[string]any{"Content-Type": "application/json"}, - "mimeType": "application/json", + }) + ev := ec.waitFor(t, "network_request", 2*time.Second) + assert.Equal(t, events.CategoryNetwork, ev.Category) + assert.Equal(t, "Network.requestWillBeSent", ev.Source.Event) + + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "POST", data["method"]) + assert.Equal(t, "https://api.example.com/data", data["url"]) + + // Complete the request lifecycle. + srv.sendToMonitor(t, map[string]any{ + "method": "Network.responseReceived", + "params": map[string]any{ + "requestId": "req-001", + "response": map[string]any{ + "status": 200, "statusText": "OK", + "headers": map[string]any{"Content-Type": "application/json"}, "mimeType": "application/json", + }, }, - }, - }) - srv.sendToMonitor(t, map[string]any{ - "method": "Network.loadingFinished", - "params": map[string]any{ - "requestId": reqID, - }, - }) + }) + srv.sendToMonitor(t, map[string]any{ + "method": "Network.loadingFinished", + "params": map[string]any{"requestId": "req-001"}, + }) - ev2 := waitForEvent(t, &published, &mu, "network_response", 3*time.Second) - assert.Equal(t, events.CategoryNetwork, ev2.Category) - assert.Equal(t, "Network.loadingFinished", ev2.Source.Event) - var data2 map[string]any - require.NoError(t, json.Unmarshal(ev2.Data, &data2)) - assert.Equal(t, float64(200), data2["status"]) - assert.NotEmpty(t, data2["body"]) + ev2 := ec.waitFor(t, "network_response", 3*time.Second) + assert.Equal(t, "Network.loadingFinished", ev2.Source.Event) + var data2 map[string]any + require.NoError(t, json.Unmarshal(ev2.Data, &data2)) + assert.Equal(t, float64(200), data2["status"]) + assert.NotEmpty(t, data2["body"]) + }) - // 3. loadingFailed → network_loading_failed - const reqID2 = "req-002" - srv.sendToMonitor(t, map[string]any{ - "method": "Network.requestWillBeSent", - "params": map[string]any{ - "requestId": reqID2, - "request": map[string]any{ - "method": "GET", - "url": "https://fail.example.com/", + t.Run("loading_failed", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Network.requestWillBeSent", + "params": map[string]any{ + "requestId": "req-002", + "request": map[string]any{"method": "GET", "url": "https://fail.example.com/"}, }, - }, + }) + ec.waitForNew(t, "network_request", 2*time.Second) + + srv.sendToMonitor(t, map[string]any{ + "method": "Network.loadingFailed", + "params": map[string]any{ + "requestId": "req-002", + "errorText": "net::ERR_CONNECTION_REFUSED", + "canceled": false, + }, + }) + ev := ec.waitFor(t, "network_loading_failed", 2*time.Second) + assert.Equal(t, events.CategoryNetwork, ev.Category) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "net::ERR_CONNECTION_REFUSED", data["error_text"]) }) - waitForEvent(t, &published, &mu, "network_request", 2*time.Second) - mu.Lock() - published = published[:0] - mu.Unlock() + t.Run("binary_resource_skips_body", func(t *testing.T) { + var getBodyCalled atomic.Bool + srv.sendToMonitor(t, map[string]any{ + "method": "Network.requestWillBeSent", + "params": map[string]any{ + "requestId": "img-001", + "resourceType": "Image", + "request": map[string]any{"method": "GET", "url": "https://example.com/photo.png"}, + }, + }) + srv.sendToMonitor(t, map[string]any{ + "method": "Network.responseReceived", + "params": map[string]any{ + "requestId": "img-001", + "response": map[string]any{"status": 200, "statusText": "OK", "headers": map[string]any{}, "mimeType": "image/png"}, + }, + }) + srv.sendToMonitor(t, map[string]any{ + "method": "Network.loadingFinished", + "params": map[string]any{"requestId": "img-001"}, + }) - srv.sendToMonitor(t, map[string]any{ - "method": "Network.loadingFailed", - "params": map[string]any{ - "requestId": reqID2, - "errorText": "net::ERR_CONNECTION_REFUSED", - "canceled": false, - }, + ev := ec.waitForNew(t, "network_response", 3*time.Second) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, "", data["body"], "binary resource should have empty body") + assert.False(t, getBodyCalled.Load(), "should not call getResponseBody for images") }) - ev3 := waitForEvent(t, &published, &mu, "network_loading_failed", 2*time.Second) - assert.Equal(t, events.CategoryNetwork, ev3.Category) - var data3 map[string]any - require.NoError(t, json.Unmarshal(ev3.Data, &data3)) - assert.Equal(t, "net::ERR_CONNECTION_REFUSED", data3["error_text"]) } -// TestPageEvents verifies navigation, dom_content_loaded, page_load, and dom_updated. func TestPageEvents(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + _, ec, cleanup := startMonitor(t, srv, nil) defer cleanup() - // frameNavigated → navigation srv.sendToMonitor(t, map[string]any{ "method": "Page.frameNavigated", "params": map[string]any{ - "frame": map[string]any{ - "id": "frame-1", - "url": "https://example.com/page", - }, + "frame": map[string]any{"id": "frame-1", "url": "https://example.com/page"}, }, }) - ev := waitForEvent(t, published, mu, "navigation", 2*time.Second) + ev := ec.waitFor(t, "navigation", 2*time.Second) assert.Equal(t, events.CategoryPage, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) assert.Equal(t, "Page.frameNavigated", ev.Source.Event) var data map[string]any require.NoError(t, json.Unmarshal(ev.Data, &data)) assert.Equal(t, "https://example.com/page", data["url"]) - // domContentEventFired → dom_content_loaded srv.sendToMonitor(t, map[string]any{ "method": "Page.domContentEventFired", "params": map[string]any{"timestamp": 1000.0}, }) - ev2 := waitForEvent(t, published, mu, "dom_content_loaded", 2*time.Second) + ev2 := ec.waitFor(t, "dom_content_loaded", 2*time.Second) assert.Equal(t, events.CategoryPage, ev2.Category) + assert.Equal(t, events.DetailMinimal, ev2.DetailLevel) - // loadEventFired → page_load srv.sendToMonitor(t, map[string]any{ "method": "Page.loadEventFired", "params": map[string]any{"timestamp": 1001.0}, }) - ev3 := waitForEvent(t, published, mu, "page_load", 2*time.Second) + ev3 := ec.waitFor(t, "page_load", 2*time.Second) assert.Equal(t, events.CategoryPage, ev3.Category) + assert.Equal(t, events.DetailMinimal, ev3.DetailLevel) - // documentUpdated → dom_updated srv.sendToMonitor(t, map[string]any{ "method": "DOM.documentUpdated", "params": map[string]any{}, }) - ev4 := waitForEvent(t, published, mu, "dom_updated", 2*time.Second) + ev4 := ec.waitFor(t, "dom_updated", 2*time.Second) assert.Equal(t, events.CategoryPage, ev4.Category) + assert.Equal(t, events.DetailMinimal, ev4.DetailLevel) } -// TestTargetEvents verifies target_created and target_destroyed. func TestTargetEvents(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + _, ec, cleanup := startMonitor(t, srv, nil) defer cleanup() - // targetCreated → target_created srv.sendToMonitor(t, map[string]any{ "method": "Target.targetCreated", "params": map[string]any{ - "targetInfo": map[string]any{ - "targetId": "target-1", - "type": "page", - "url": "https://new.example.com", - }, + "targetInfo": map[string]any{"targetId": "t-1", "type": "page", "url": "https://new.example.com"}, }, }) - ev := waitForEvent(t, published, mu, "target_created", 2*time.Second) + ev := ec.waitFor(t, "target_created", 2*time.Second) assert.Equal(t, events.CategoryPage, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "Target.targetCreated", ev.Source.Event) + assert.Equal(t, events.DetailMinimal, ev.DetailLevel) var data map[string]any require.NoError(t, json.Unmarshal(ev.Data, &data)) - assert.Equal(t, "target-1", data["target_id"]) + assert.Equal(t, "t-1", data["target_id"]) - // targetDestroyed → target_destroyed srv.sendToMonitor(t, map[string]any{ "method": "Target.targetDestroyed", - "params": map[string]any{ - "targetId": "target-1", - }, + "params": map[string]any{"targetId": "t-1"}, }) - ev2 := waitForEvent(t, published, mu, "target_destroyed", 2*time.Second) + ev2 := ec.waitFor(t, "target_destroyed", 2*time.Second) assert.Equal(t, events.CategoryPage, ev2.Category) - var data2 map[string]any - require.NoError(t, json.Unmarshal(ev2.Data, &data2)) - assert.Equal(t, "target-1", data2["target_id"]) + assert.Equal(t, events.DetailMinimal, ev2.DetailLevel) } -// TestBindingAndTimeline verifies that scroll_settled arrives via -// Runtime.bindingCalled and layout_shift arrives via PerformanceTimeline. func TestBindingAndTimeline(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - _, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + _, ec, cleanup := startMonitor(t, srv, nil) defer cleanup() - // scroll_settled via Runtime.bindingCalled - srv.sendToMonitor(t, map[string]any{ - "method": "Runtime.bindingCalled", - "params": map[string]any{ - "name": "__kernelEvent", - "payload": `{"type":"scroll_settled","from_x":0,"from_y":0,"to_x":0,"to_y":500,"target_selector":"body"}`, - }, + t.Run("interaction_click", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.bindingCalled", + "params": map[string]any{ + "name": "__kernelEvent", + "payload": `{"type":"interaction_click","x":10,"y":20,"selector":"button","tag":"BUTTON","text":"OK"}`, + }, + }) + ev := ec.waitFor(t, "interaction_click", 2*time.Second) + assert.Equal(t, events.CategoryInteraction, ev.Category) + assert.Equal(t, "Runtime.bindingCalled", ev.Source.Event) }) - ev := waitForEvent(t, published, mu, "scroll_settled", 2*time.Second) - assert.Equal(t, events.CategoryInteraction, ev.Category) - assert.Equal(t, "Runtime.bindingCalled", ev.Source.Event) - var data map[string]any - require.NoError(t, json.Unmarshal(ev.Data, &data)) - assert.Equal(t, float64(500), data["to_y"]) - // layout_shift via PerformanceTimeline.timelineEventAdded - srv.sendToMonitor(t, map[string]any{ - "method": "PerformanceTimeline.timelineEventAdded", - "params": map[string]any{ - "event": map[string]any{ - "type": "layout-shift", + t.Run("scroll_settled", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.bindingCalled", + "params": map[string]any{ + "name": "__kernelEvent", + "payload": `{"type":"scroll_settled","from_x":0,"from_y":0,"to_x":0,"to_y":500,"target_selector":"body"}`, }, - }, + }) + ev := ec.waitFor(t, "scroll_settled", 2*time.Second) + assert.Equal(t, events.CategoryInteraction, ev.Category) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.Equal(t, float64(500), data["to_y"]) }) - ev2 := waitForEvent(t, published, mu, "layout_shift", 2*time.Second) - assert.Equal(t, events.KindCDP, ev2.Source.Kind) - assert.Equal(t, "PerformanceTimeline.timelineEventAdded", ev2.Source.Event) - noEventWithin(t, published, mu, "console_log", 100*time.Millisecond) + t.Run("layout_shift", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "PerformanceTimeline.timelineEventAdded", + "params": map[string]any{ + "event": map[string]any{"type": "layout-shift"}, + }, + }) + ev := ec.waitFor(t, "layout_shift", 2*time.Second) + assert.Equal(t, events.KindCDP, ev.Source.Kind) + assert.Equal(t, "PerformanceTimeline.timelineEventAdded", ev.Source.Event) + }) + + t.Run("unknown_binding_ignored", func(t *testing.T) { + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.bindingCalled", + "params": map[string]any{ + "name": "someOtherBinding", + "payload": `{"type":"interaction_click"}`, + }, + }) + ec.assertNone(t, "interaction_click", 100*time.Millisecond) + }) } -// TestScreenshot verifies rate limiting and the screenshotFn testable seam. func TestScreenshot(t *testing.T) { srv := newFakeCDPServer(t) defer srv.close() - m, published, mu, cleanup := startMonitorWithFakeServer(t, srv) + m, ec, cleanup := startMonitor(t, srv, nil) defer cleanup() - // Inject a mock screenshotFn that returns a tiny valid PNG. var captureCount atomic.Int32 - // 1x1 white PNG (minimal valid PNG bytes) minimalPNG := []byte{ - 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature - 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk length + type - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // width=1, height=1 - 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, // bit depth=8, color type=2, ... - 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, // IDAT chunk + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, + 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, 0x54, 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21, 0xbc, - 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, // IEND chunk + 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82, } m.screenshotFn = func(ctx context.Context, displayNum int) ([]byte, error) { @@ -863,280 +805,226 @@ func TestScreenshot(t *testing.T) { return minimalPNG, nil } - // First maybeScreenshot call — should capture. - ctx := context.Background() - m.maybeScreenshot(ctx) - // Give the goroutine time to run. - require.Eventually(t, func() bool { - return captureCount.Load() == 1 - }, 2*time.Second, 20*time.Millisecond) - - // Second call immediately after — should be rate-limited (no capture). - m.maybeScreenshot(ctx) - time.Sleep(100 * time.Millisecond) - assert.Equal(t, int32(1), captureCount.Load(), "second call within 2s should be rate-limited") - - // Verify screenshot event was published with png field. - ev := waitForEvent(t, published, mu, "screenshot", 2*time.Second) - assert.Equal(t, events.CategorySystem, ev.Category) - assert.Equal(t, events.KindLocalProcess, ev.Source.Kind) - var data map[string]any - require.NoError(t, json.Unmarshal(ev.Data, &data)) - assert.NotEmpty(t, data["png"]) + t.Run("capture_and_publish", func(t *testing.T) { + m.maybeScreenshot(context.Background()) + require.Eventually(t, func() bool { return captureCount.Load() == 1 }, 2*time.Second, 20*time.Millisecond) - // Fast-forward lastScreenshotAt to simulate 2s+ elapsed. - m.lastScreenshotAt.Store(time.Now().Add(-3 * time.Second).UnixMilli()) - m.maybeScreenshot(ctx) - require.Eventually(t, func() bool { - return captureCount.Load() == 2 - }, 2*time.Second, 20*time.Millisecond) -} + ev := ec.waitFor(t, "screenshot", 2*time.Second) + assert.Equal(t, events.CategorySystem, ev.Category) + assert.Equal(t, events.KindLocalProcess, ev.Source.Kind) + var data map[string]any + require.NoError(t, json.Unmarshal(ev.Data, &data)) + assert.NotEmpty(t, data["png"]) + }) -// --- Computed meta-event tests --- + t.Run("rate_limited", func(t *testing.T) { + before := captureCount.Load() + m.maybeScreenshot(context.Background()) + time.Sleep(100 * time.Millisecond) + assert.Equal(t, before, captureCount.Load(), "should be rate-limited within 2s") + }) -// newComputedMonitor creates a Monitor with a capture function and returns -// the published events slice and its mutex for inspection. -func newComputedMonitor(t *testing.T) (*Monitor, *[]events.Event, *sync.Mutex) { - t.Helper() - var mu sync.Mutex - published := make([]events.Event, 0) - publishFn := func(ev events.Event) { - mu.Lock() - published = append(published, ev) - mu.Unlock() - } - upstream := newFakeUpstream("ws://127.0.0.1:0") // not used; no real dial - m := New(upstream, publishFn, 0) - return m, &published, &mu + t.Run("captures_after_cooldown", func(t *testing.T) { + m.lastScreenshotAt.Store(time.Now().Add(-3 * time.Second).UnixMilli()) + before := captureCount.Load() + m.maybeScreenshot(context.Background()) + require.Eventually(t, func() bool { return captureCount.Load() > before }, 2*time.Second, 20*time.Millisecond) + }) } +func TestAttachExistingTargets(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() -// noEventWithin asserts that no event of the given type is published within d. -func noEventWithin(t *testing.T, published *[]events.Event, mu *sync.Mutex, eventType string, d time.Duration) { - t.Helper() - deadline := time.Now().Add(d) - for time.Now().Before(deadline) { - mu.Lock() - for _, ev := range *published { - if ev.Type == eventType { - mu.Unlock() - t.Fatalf("unexpected event %q published", eventType) + responder := func(msg cdpMessage) any { + srv.connMu.Lock() + c := srv.conn + srv.connMu.Unlock() + switch msg.Method { + case "Target.getTargets": + return map[string]any{ + "id": msg.ID, + "result": map[string]any{ + "targetInfos": []any{ + map[string]any{"targetId": "existing-1", "type": "page", "url": "https://preexisting.example.com"}, + }, + }, } + case "Target.attachToTarget": + if c != nil { + _ = wsjson.Write(context.Background(), c, map[string]any{ + "method": "Target.attachedToTarget", + "params": map[string]any{ + "sessionId": "session-existing-1", + "targetInfo": map[string]any{"targetId": "existing-1", "type": "page", "url": "https://preexisting.example.com"}, + }, + }) + } + return map[string]any{"id": msg.ID, "result": map[string]any{"sessionId": "session-existing-1"}} } - mu.Unlock() - time.Sleep(10 * time.Millisecond) + return nil } + + m, _, cleanup := startMonitor(t, srv, responder) + defer cleanup() + + require.Eventually(t, func() bool { + m.sessionsMu.RLock() + defer m.sessionsMu.RUnlock() + _, ok := m.sessions["session-existing-1"] + return ok + }, 3*time.Second, 50*time.Millisecond, "existing target not auto-attached") + + m.sessionsMu.RLock() + info := m.sessions["session-existing-1"] + m.sessionsMu.RUnlock() + assert.Equal(t, "existing-1", info.targetID) } -// TestNetworkIdle verifies the 500ms debounce for network_idle. -func TestNetworkIdle(t *testing.T) { - m, published, mu := newComputedMonitor(t) +func TestURLPopulated(t *testing.T) { + srv := newFakeCDPServer(t) + defer srv.close() + + _, ec, cleanup := startMonitor(t, srv, nil) + defer cleanup() - // Simulate navigation (resets computed state). - navParams, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + srv.sendToMonitor(t, map[string]any{ + "method": "Page.frameNavigated", + "params": map[string]any{ + "frame": map[string]any{"id": "f1", "url": "https://example.com/page"}, + }, }) - m.handleFrameNavigated(navParams, "s1") - // Drain the navigation event from published. - - // Helper to send requestWillBeSent. - sendReq := func(id string) { - p, _ := json.Marshal(map[string]any{ - "requestId": id, - "resourceType": "Document", - "request": map[string]any{"method": "GET", "url": "https://example.com/" + id}, - }) - m.handleNetworkRequest(p, "s1") - } - // Helper to send loadingFinished. - sendFinished := func(id string) { - // store minimal state so LoadAndDelete finds it - m.pendReqMu.Lock() - m.pendingRequests[id] = networkReqState{method: "GET", url: "https://example.com/" + id} - m.pendReqMu.Unlock() - p, _ := json.Marshal(map[string]any{"requestId": id}) - m.handleLoadingFinished(p, "s1") - } + ec.waitFor(t, "navigation", 2*time.Second) - // Send 3 requests, then finish them all. - sendReq("r1") - sendReq("r2") - sendReq("r3") - - t0 := time.Now() - sendFinished("r1") - sendFinished("r2") - sendFinished("r3") - - // network_idle should fire ~500ms after the last loadingFinished. - ev := waitForEvent(t,published, mu, "network_idle", 2*time.Second) - elapsed := time.Since(t0) - assert.GreaterOrEqual(t, elapsed.Milliseconds(), int64(400), "network_idle fired too early") - assert.Equal(t, events.CategoryNetwork, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "", ev.Source.Event) - - // --- Timer reset test: new request within 500ms resets the clock --- - m2, published2, mu2 := newComputedMonitor(t) - navParams2, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, + srv.sendToMonitor(t, map[string]any{ + "method": "Runtime.consoleAPICalled", + "params": map[string]any{ + "type": "log", + "args": []any{map[string]any{"type": "string", "value": "test"}}, + }, }) - m2.handleFrameNavigated(navParams2, "s1") + ev := ec.waitFor(t, "console_log", 2*time.Second) + assert.Equal(t, "https://example.com/page", ev.URL) +} - sendReq2 := func(id string) { - p, _ := json.Marshal(map[string]any{ - "requestId": id, - "resourceType": "Document", - "request": map[string]any{"method": "GET", "url": "https://example.com/" + id}, - }) - m2.handleNetworkRequest(p, "s1") - } - sendFinished2 := func(id string) { - m2.pendReqMu.Lock() - m2.pendingRequests[id] = networkReqState{method: "GET", url: "https://example.com/" + id} - m2.pendReqMu.Unlock() - p, _ := json.Marshal(map[string]any{"requestId": id}) - m2.handleLoadingFinished(p, "s1") - } +// --------------------------------------------------------------------------- +// Computed meta-event tests — use direct handler calls, no websocket needed. +// --------------------------------------------------------------------------- - sendReq2("a1") - sendFinished2("a1") - // 200ms later, a new request starts (timer should reset) - time.Sleep(200 * time.Millisecond) - sendReq2("a2") - t1 := time.Now() - sendFinished2("a2") - - ev2 := waitForEvent(t,published2, mu2, "network_idle", 2*time.Second) - elapsed2 := time.Since(t1) - // Should fire ~500ms after a2 finished, not 500ms after a1 - assert.GreaterOrEqual(t, elapsed2.Milliseconds(), int64(400), "network_idle should reset timer on new request") - assert.Equal(t, events.CategoryNetwork, ev2.Category) +// simulateRequest sends a Network.requestWillBeSent through the handler. +func simulateRequest(m *Monitor, id string) { + p, _ := json.Marshal(map[string]any{ + "requestId": id, "resourceType": "Document", + "request": map[string]any{"method": "GET", "url": "https://example.com/" + id}, + }) + m.handleNetworkRequest(p, "s1") } -// TestLayoutSettled verifies the 1s debounce for layout_settled. -func TestLayoutSettled(t *testing.T) { - m, published, mu := newComputedMonitor(t) +// simulateFinished stores minimal state and sends Network.loadingFinished. +func simulateFinished(m *Monitor, id string) { + m.pendReqMu.Lock() + m.pendingRequests[id] = networkReqState{method: "GET", url: "https://example.com/" + id} + m.pendReqMu.Unlock() + p, _ := json.Marshal(map[string]any{"requestId": id}) + m.handleLoadingFinished(p, "s1") +} - // Navigate to reset state. - navParams, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, +func TestNetworkIdle(t *testing.T) { + t.Run("debounce_500ms", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") + + simulateRequest(m, "r1") + simulateRequest(m, "r2") + simulateRequest(m, "r3") + + t0 := time.Now() + simulateFinished(m, "r1") + simulateFinished(m, "r2") + simulateFinished(m, "r3") + + ev := ec.waitFor(t, "network_idle", 2*time.Second) + assert.GreaterOrEqual(t, time.Since(t0).Milliseconds(), int64(400), "fired too early") + assert.Equal(t, events.CategoryNetwork, ev.Category) }) - m.handleFrameNavigated(navParams, "s1") - // Simulate page_load (Page.loadEventFired). - // We bypass the ffmpeg screenshot side-effect by keeping screenshotFn nil-safe. - t0 := time.Now() - m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + t.Run("timer_reset_on_new_request", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") - // layout_settled should fire ~1s after page_load (no layout shifts). - ev := waitForEvent(t,published, mu, "layout_settled", 3*time.Second) - elapsed := time.Since(t0) - assert.GreaterOrEqual(t, elapsed.Milliseconds(), int64(900), "layout_settled fired too early") - assert.Equal(t, events.CategoryPage, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "", ev.Source.Event) + simulateRequest(m, "a1") + simulateFinished(m, "a1") + time.Sleep(200 * time.Millisecond) - // --- Layout shift resets the timer --- - m2, published2, mu2 := newComputedMonitor(t) - navParams2, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, - }) - m2.handleFrameNavigated(navParams2, "s1") - m2.handleLoadEventFired(json.RawMessage(`{}`), "s1") + simulateRequest(m, "a2") + t1 := time.Now() + simulateFinished(m, "a2") - // Simulate a native CDP layout shift at 600ms. - time.Sleep(600 * time.Millisecond) - shiftParams, _ := json.Marshal(map[string]any{ - "event": map[string]any{"type": "layout-shift"}, + ec.waitFor(t, "network_idle", 2*time.Second) + assert.GreaterOrEqual(t, time.Since(t1).Milliseconds(), int64(400), "should reset timer on new request") }) - m2.handleTimelineEvent(shiftParams, "s1") - t1 := time.Now() - - // layout_settled fires ~1s after the shift, not 1s after page_load. - ev2 := waitForEvent(t,published2, mu2, "layout_settled", 3*time.Second) - elapsed2 := time.Since(t1) - assert.GreaterOrEqual(t, elapsed2.Milliseconds(), int64(900), "layout_settled should reset after layout_shift") - assert.Equal(t, events.CategoryPage, ev2.Category) } -// TestScrollSettled verifies that a scroll_settled sentinel from JS is passed through. -func TestScrollSettled(t *testing.T) { - m, published, mu := newComputedMonitor(t) +func TestLayoutSettled(t *testing.T) { + t.Run("debounce_1s_after_page_load", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") + + t0 := time.Now() + m.handleLoadEventFired(json.RawMessage(`{}`), "s1") - // Simulate scroll_settled via Runtime.bindingCalled. - bindingParams, _ := json.Marshal(map[string]any{ - "name": "__kernelEvent", - "payload": `{"type":"scroll_settled"}`, + ev := ec.waitFor(t, "layout_settled", 3*time.Second) + assert.GreaterOrEqual(t, time.Since(t0).Milliseconds(), int64(900), "fired too early") + assert.Equal(t, events.CategoryPage, ev.Category) }) - m.handleBindingCalled(bindingParams, "s1") - ev := waitForEvent(t,published, mu, "scroll_settled", 1*time.Second) - assert.Equal(t, events.CategoryInteraction, ev.Category) + t.Run("layout_shift_resets_timer", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") + m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + + time.Sleep(600 * time.Millisecond) + shiftParams, _ := json.Marshal(map[string]any{ + "event": map[string]any{"type": "layout-shift"}, + }) + m.handleTimelineEvent(shiftParams, "s1") + t1 := time.Now() + + ec.waitFor(t, "layout_settled", 3*time.Second) + assert.GreaterOrEqual(t, time.Since(t1).Milliseconds(), int64(900), "should reset after layout_shift") + }) } -// TestNavigationSettled verifies the three-flag gate for navigation_settled. func TestNavigationSettled(t *testing.T) { - m, published, mu := newComputedMonitor(t) + t.Run("fires_when_all_three_flags_set", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") - // Navigate to initialise flags. - navParams, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, - }) - m.handleFrameNavigated(navParams, "s1") + m.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") - // Trigger dom_content_loaded. - m.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") + // Trigger network_idle. + simulateRequest(m, "r1") + simulateFinished(m, "r1") - // Trigger network_idle via load cycle. - reqP, _ := json.Marshal(map[string]any{ - "requestId": "r1", "resourceType": "Document", - "request": map[string]any{"method": "GET", "url": "https://example.com/r1"}, + // Trigger layout_settled via page_load. + m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + + ev := ec.waitFor(t, "navigation_settled", 3*time.Second) + assert.Equal(t, events.CategoryPage, ev.Category) }) - m.handleNetworkRequest(reqP, "s1") - m.pendReqMu.Lock() - m.pendingRequests["r1"] = networkReqState{method: "GET", url: "https://example.com/r1"} - m.pendReqMu.Unlock() - finP, _ := json.Marshal(map[string]any{"requestId": "r1"}) - m.handleLoadingFinished(finP, "s1") - // Trigger layout_settled via page_load (1s timer). - m.handleLoadEventFired(json.RawMessage(`{}`), "s1") + t.Run("interrupted_by_new_navigation", func(t *testing.T) { + m, ec := newComputedMonitor(t) + navigateMonitor(m, "https://example.com") - // Wait for navigation_settled (all three flags set). - ev := waitForEvent(t,published, mu, "navigation_settled", 3*time.Second) - assert.Equal(t, events.CategoryPage, ev.Category) - assert.Equal(t, events.KindCDP, ev.Source.Kind) - assert.Equal(t, "", ev.Source.Event) + m.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") - // --- Navigation interrupt test --- - m2, published2, mu2 := newComputedMonitor(t) + simulateRequest(m, "r2") + simulateFinished(m, "r2") - navP1, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com"}, - }) - m2.handleFrameNavigated(navP1, "s1") + // Interrupt before layout_settled fires. + navigateMonitor(m, "https://example.com/page2") - // Start sequence: dom_content_loaded + network_idle. - m2.handleDOMContentLoaded(json.RawMessage(`{}`), "s1") - reqP2, _ := json.Marshal(map[string]any{ - "requestId": "r2", "resourceType": "Document", - "request": map[string]any{"method": "GET", "url": "https://example.com/r2"}, + ec.assertNone(t, "navigation_settled", 1500*time.Millisecond) }) - m2.handleNetworkRequest(reqP2, "s1") - m2.pendReqMu.Lock() - m2.pendingRequests["r2"] = networkReqState{method: "GET", url: "https://example.com/r2"} - m2.pendReqMu.Unlock() - finP2, _ := json.Marshal(map[string]any{"requestId": "r2"}) - m2.handleLoadingFinished(finP2, "s1") - - // Interrupt with a new navigation before layout_settled fires. - navP2, _ := json.Marshal(map[string]any{ - "frame": map[string]any{"id": "f1", "url": "https://example.com/page2"}, - }) - m2.handleFrameNavigated(navP2, "s1") - - // navigation_settled should NOT fire for the interrupted sequence. - noEventWithin(t, published2, mu2, "navigation_settled", 1500*time.Millisecond) - _ = mu2 // suppress unused warning }