diff --git a/adapter/admin_grpc.go b/adapter/admin_grpc.go new file mode 100644 index 00000000..cbca0b57 --- /dev/null +++ b/adapter/admin_grpc.go @@ -0,0 +1,479 @@ +package adapter + +import ( + "context" + "crypto/subtle" + "sort" + "strings" + "sync" + "time" + + "github.com/bootjp/elastickv/internal/raftengine" + pb "github.com/bootjp/elastickv/proto" + "github.com/cockroachdb/errors" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +// AdminGroup exposes per-Raft-group state to the Admin service. It is a narrow +// subset of raftengine.Engine so tests can supply an in-memory fake without +// standing up a real Raft cluster. Configuration is polled on each +// GetClusterOverview to pick up scale-out / scale-in events without the +// operator having to restart the admin binary. +type AdminGroup interface { + Status() raftengine.Status + Configuration(ctx context.Context) (raftengine.Configuration, error) +} + +// NodeIdentity is the value form of the protobuf NodeIdentity message used for +// AdminServer configuration. It avoids copying pb.NodeIdentity, which embeds a +// protoimpl.MessageState (and a mutex). +type NodeIdentity struct { + NodeID string + GRPCAddress string +} + +func (n NodeIdentity) toProto() *pb.NodeIdentity { + return &pb.NodeIdentity{NodeId: n.NodeID, GrpcAddress: n.GRPCAddress} +} + +// AdminServer implements the node-side Admin gRPC service described in +// docs/admin_ui_key_visualizer_design.md §4 (Layer A). Phase 0 only implements +// GetClusterOverview and GetRaftGroups; remaining RPCs return Unimplemented so +// the generated client can still compile against older nodes during rollout. +type AdminServer struct { + self NodeIdentity + members []NodeIdentity + + groupsMu sync.RWMutex + groups map[uint64]AdminGroup + + // now is the clock used for LastContactUnixMs and any other + // timestamping this service needs. It's a per-server field (not a + // package global) so `-race` tests that swap the clock on one server + // instance cannot contend with concurrent RPCs on another instance. + now func() time.Time + + pb.UnimplementedAdminServer +} + +// NewAdminServer constructs an AdminServer. `self` identifies the local node +// for responses that return node identity. `members` is the static membership +// snapshot shipped to the admin binary; callers that already have a membership +// source may pass nil and let the admin binary's fan-out layer discover peers +// by other means. +func NewAdminServer(self NodeIdentity, members []NodeIdentity) *AdminServer { + cloned := append([]NodeIdentity(nil), members...) + return &AdminServer{ + self: self, + members: cloned, + groups: make(map[uint64]AdminGroup), + now: time.Now, + } +} + +// SetClock overrides the clock used by GetRaftGroups, letting tests inject a +// fixed time without mutating any package-global state. Concurrent RPCs on +// other AdminServer instances are unaffected. +func (s *AdminServer) SetClock(now func() time.Time) { + if now == nil { + now = time.Now + } + s.groupsMu.Lock() + s.now = now + s.groupsMu.Unlock() +} + +// RegisterGroup binds a Raft group ID to its engine so the Admin service can +// report leader and log state for that group. +func (s *AdminServer) RegisterGroup(groupID uint64, g AdminGroup) { + if g == nil { + return + } + s.groupsMu.Lock() + s.groups[groupID] = g + s.groupsMu.Unlock() +} + +// GetClusterOverview returns the local node identity, the current member +// list, and per-group leader identity collected from the engines registered +// via RegisterGroup. The member list is the union of (a) the bootstrap seed +// supplied to NewAdminServer and (b) the live Configuration of every +// registered Raft group — the latter picks up scale-out nodes added after +// startup so the admin binary's fan-out discovery does not miss them. +func (s *AdminServer) GetClusterOverview( + ctx context.Context, + _ *pb.GetClusterOverviewRequest, +) (*pb.GetClusterOverviewResponse, error) { + leaders := s.snapshotLeaders() + members := s.snapshotMembers(ctx) + return &pb.GetClusterOverviewResponse{ + Self: s.self.toProto(), + Members: members, + GroupLeaders: leaders, + }, nil +} + +// snapshotMembers unions the seed members with the live Configuration of each +// registered group, preferring the live address when the same NodeID appears +// in both sources. A stale bootstrap entry cannot outvote a readdressed node: +// if n2 was moved from 10.0.0.12 to 10.0.0.22, the overview reports the +// current 10.0.0.22 so fan-out dials the right target. Configuration errors +// on a single group do not fail the RPC — other groups plus the seed list +// still produce useful output. +func (s *AdminServer) snapshotMembers(ctx context.Context) []*pb.NodeIdentity { + groups := s.cloneGroupsSorted() + live := collectLiveMembers(ctx, groups, s.self.NodeID) + mergeSeedMembers(s.members, s.self.NodeID, &live) + + out := make([]*pb.NodeIdentity, 0, len(live.order)) + for _, id := range live.order { + out = append(out, &pb.NodeIdentity{NodeId: id, GrpcAddress: live.addrByID[id]}) + } + return out +} + +// groupEntry pairs a Raft group ID with its AdminGroup so callers can iterate +// in a deterministic (ID-ascending) order. Sorting matters for +// collectLiveMembers: when two groups report the same NodeID with different +// addresses (e.g., mid-readdress), the iteration order picks which address +// wins, and a Go map's range order is unspecified. +type groupEntry struct { + id uint64 + group AdminGroup +} + +// cloneGroupsSorted snapshots the registered groups under the read lock and +// returns them sorted by group ID so iteration and tie-break decisions are +// stable across calls. +func (s *AdminServer) cloneGroupsSorted() []groupEntry { + s.groupsMu.RLock() + defer s.groupsMu.RUnlock() + out := make([]groupEntry, 0, len(s.groups)) + for id, g := range s.groups { + out = append(out, groupEntry{id: id, group: g}) + } + sort.Slice(out, func(i, j int) bool { return out[i].id < out[j].id }) + return out +} + +// collectLiveMembers polls Configuration for each group (in ascending group +// ID order supplied by the caller) and returns the union of server IDs +// (excluding self) with their live addresses. When two groups report the +// same server ID with different addresses — e.g. mid-readdress before every +// group has converged — the lowest-ID group wins, which is stable across +// calls and matches "trust the primary group" intuition. +// +// Entries with an empty `srv.Address` (the etcd engine can emit those when +// peer metadata is still missing) are skipped: storing a blank address would +// shadow a usable seed entry for the same NodeID and cause GetClusterOverview +// to drop the peer from fan-out altogether. Letting the seed list backfill +// keeps the peer reachable until the live Configuration converges. +// +// Per-group Configuration calls run concurrently because a sequential loop +// would stall the entire RPC behind any one slow group; results are written +// into a pre-allocated slice indexed by the sorted-order position so the +// merge step still walks groups in ascending-ID order and preserves the +// deterministic tie-break. +// configResult bundles a Configuration RPC outcome with its position in the +// caller-supplied groups slice so the merge step can re-sort by group-ID +// even when results land out of completion order. +type configResult struct { + i int + cfg raftengine.Configuration + err error +} + +// fanoutConfigurationCalls launches a Configuration(ctx) goroutine per +// group and collects results. Returns whatever has landed by the time ctx +// fires; remaining goroutines drain into the (buffered) channel and exit +// asynchronously when their per-RPC ctx unwinds. The early-return is the +// reason this lives in its own function: reading a shared []configResult +// slice across the cancel boundary would race the still-running goroutines. +// +// Each spawned goroutine checks ctx before issuing the RPC so a goroutine +// scheduled after the parent ctx already fired exits immediately instead +// of doing wasted gRPC work. After the RPC the goroutine drains its result +// without blocking thanks to the len(groups)-buffered channel. +// configFanoutMaxConcurrency caps how many Configuration polls run at the +// same time so a node hosting hundreds of Raft groups does not spawn a +// matching goroutine + gRPC burst on every GetClusterOverview. Sized to +// cover typical multi-raft deployments while keeping the goroutine / +// connection footprint bounded under load. Smaller than maxDiscoveredNodes +// (per-fanout target cap) on purpose: this is per-RPC concurrency, not +// total target count. +const configFanoutMaxConcurrency = 64 + +func fanoutConfigurationCalls(ctx context.Context, groups []groupEntry) []configResult { + resultsCh := make(chan configResult, len(groups)) + // sem bounds the number of goroutines actively running Configuration at + // once. We still spawn len(groups) goroutines total, but only + // configFanoutMaxConcurrency of them can be inside Configuration at the + // same time — the rest park on the semaphore acquire. Using buffered + // channel sends/receives as the semaphore avoids an extra dep. + sem := make(chan struct{}, configFanoutMaxConcurrency) + for i, entry := range groups { + go func(i int, entry groupEntry) { + // Bail out early if the parent already cancelled — avoids + // taking the semaphore + RPC path just to fail the call. + if err := ctx.Err(); err != nil { + resultsCh <- configResult{i: i, err: err} + return + } + select { + case sem <- struct{}{}: + case <-ctx.Done(): + resultsCh <- configResult{i: i, err: ctx.Err()} + return + } + defer func() { <-sem }() + cfg, err := entry.group.Configuration(ctx) + resultsCh <- configResult{i: i, cfg: cfg, err: err} + }(i, entry) + } + got := make([]configResult, 0, len(groups)) + for range groups { + select { + case res := <-resultsCh: + got = append(got, res) + case <-ctx.Done(): + return got + } + } + return got +} + +// liveMembers bundles the result of polling every Raft group's Configuration: +// addrByID lists the usable (non-blank) addresses, seenID is every NodeID any +// group reported (even with blank address) so seed backfill can distinguish +// "node still exists with bad metadata" from "node was removed", and +// authoritative is true only when EVERY group's Configuration succeeded +// (and at least one ran). A single group erroring or missing means the +// merged view is incomplete: a node only present in the failed group +// would otherwise be incorrectly treated as removed and dropped, so seed +// pruning must wait until live membership is proven across all groups. +type liveMembers struct { + addrByID map[string]string + seenID map[string]struct{} + order []string + authoritative bool +} + +func collectLiveMembers( + ctx context.Context, + groups []groupEntry, + selfID string, +) liveMembers { + got := fanoutConfigurationCalls(ctx, groups) + + // Merge in the original group-ID order so the lowest-ID-wins tie-break + // stays deterministic. (Completion order would otherwise depend on + // which Configuration() returned first.) + sort.Slice(got, func(a, b int) bool { return got[a].i < got[b].i }) + + live := liveMembers{ + addrByID: map[string]string{}, + seenID: map[string]struct{}{}, + order: []string{}, + } + successes := 0 + for _, res := range got { + if res.err != nil { + continue + } + successes++ + for _, srv := range res.cfg.Servers { + if srv.ID == "" || srv.ID == selfID { + continue + } + live.seenID[srv.ID] = struct{}{} + if srv.Address == "" { + // Known node with missing metadata — seed will backfill. + continue + } + if _, dup := live.addrByID[srv.ID]; dup { + continue + } + live.addrByID[srv.ID] = srv.Address + live.order = append(live.order, srv.ID) + } + } + // Authoritative only when every queried group reported successfully + // AND at least one group ran. If even a single Configuration call + // errored or the fanout returned early on ctx cancellation, we cannot + // distinguish "removed from cluster" from "the group that knew about + // this node was unreachable", so seeds must be allowed to fall through. + live.authoritative = successes > 0 && successes == len(groups) && len(got) == len(groups) + return live +} + +// mergeSeedMembers fills in seed entries against the live membership: +// +// - If the NodeID was seen in some live config but with a blank address, +// the seed supplies the address (handles the etcd convergence transient). +// - If the NodeID was not seen at all and the live result is authoritative +// (every queried group's Configuration succeeded), the seed is a removed +// node — drop it instead of re-advertising it forever. +// - Otherwise (cold start, partial failure, ctx cancellation, or every +// Configuration errored), fall back to the seed: a node only known to +// the failed group must not be silently dropped. +// +// Codex flagged two regressions in iterations of this function: +// (a) the original version re-added every removed seed, never converging on +// scale-in; (b) the round-24 fix flipped to drop on any single success, +// which dropped peers visible only in a partially-failing group. The +// current contract requires *every* group to report cleanly before the +// pruning path kicks in. +func mergeSeedMembers(seeds []NodeIdentity, selfID string, live *liveMembers) { + for _, m := range seeds { + if m.NodeID == "" || m.NodeID == selfID { + continue + } + if _, hasAddr := live.addrByID[m.NodeID]; hasAddr { + continue + } + _, seen := live.seenID[m.NodeID] + if !seen && live.authoritative { + // Live config is authoritative and doesn't know this node: + // it was removed via raft RemoveServer. Skip. + continue + } + live.addrByID[m.NodeID] = m.GRPCAddress + live.order = append(live.order, m.NodeID) + } +} + +// GetRaftGroups returns per-group state snapshots. Phase 0 wires commit/applied +// indices only; per-follower contact and term history land in later phases. +func (s *AdminServer) GetRaftGroups( + _ context.Context, + _ *pb.GetRaftGroupsRequest, +) (*pb.GetRaftGroupsResponse, error) { + s.groupsMu.RLock() + defer s.groupsMu.RUnlock() + ids := sortedGroupIDs(s.groups) + out := make([]*pb.RaftGroupState, 0, len(ids)) + now := s.now() + for _, id := range ids { + st := s.groups[id].Status() + // Translate LastContact (duration since the last contact with the + // leader, per raftengine.Status) into an absolute unix-ms so UI + // clients can diff against their own clock instead of having to + // reason about the server's uptime. The etcd engine returns a + // sentinel negative duration when contact is unknown (e.g., a + // follower that has never heard from a leader). Report that case + // as `LastContactUnixMs=0` (epoch) so the UI can render "unknown" + // / "never contacted" rather than treating it as "freshly + // contacted just now". + var lastContactUnixMs int64 + if st.LastContact >= 0 { + lastContactUnixMs = now.Add(-st.LastContact).UnixMilli() + } + out = append(out, &pb.RaftGroupState{ + RaftGroupId: id, + LeaderNodeId: st.Leader.ID, + LeaderTerm: st.Term, + CommitIndex: st.CommitIndex, + AppliedIndex: st.AppliedIndex, + LastContactUnixMs: lastContactUnixMs, + }) + } + return &pb.GetRaftGroupsResponse{Groups: out}, nil +} + +func (s *AdminServer) snapshotLeaders() []*pb.GroupLeader { + s.groupsMu.RLock() + defer s.groupsMu.RUnlock() + ids := sortedGroupIDs(s.groups) + out := make([]*pb.GroupLeader, 0, len(ids)) + for _, id := range ids { + st := s.groups[id].Status() + out = append(out, &pb.GroupLeader{ + RaftGroupId: id, + LeaderNodeId: st.Leader.ID, + LeaderTerm: st.Term, + }) + } + return out +} + +// sortedGroupIDs returns the map's keys in ascending order so Admin responses +// are deterministic across calls — admin tooling and tests both rely on stable +// ordering. +func sortedGroupIDs(m map[uint64]AdminGroup) []uint64 { + ids := make([]uint64, 0, len(m)) + for id := range m { + ids = append(ids, id) + } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) + return ids +} + +// adminMethodPrefix is "/Admin/" today but is derived from the generated +// service descriptor so a future proto package declaration (which would +// package-qualify the service name) does not silently bypass the auth gate. +var adminMethodPrefix = "/" + pb.Admin_ServiceDesc.ServiceName + "/" + +// AdminTokenAuth builds a gRPC unary+stream interceptor pair enforcing +// "authorization: Bearer " metadata against the supplied token. An +// empty token disables enforcement; callers should pair that mode with a +// --adminInsecureNoAuth flag so operators knowingly opt in. +func AdminTokenAuth(token string) (grpc.UnaryServerInterceptor, grpc.StreamServerInterceptor) { + if token == "" { + return nil, nil + } + expected := []byte(token) + check := func(ctx context.Context) error { + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + return status.Error(codes.Unauthenticated, "missing authorization metadata") + } + values := md.Get("authorization") + if len(values) == 0 { + return status.Error(codes.Unauthenticated, "missing authorization header") + } + got, ok := strings.CutPrefix(values[0], "Bearer ") + if !ok { + return status.Error(codes.Unauthenticated, "authorization is not a bearer token") + } + if subtle.ConstantTimeCompare([]byte(got), expected) != 1 { + return status.Error(codes.Unauthenticated, "invalid admin token") + } + return nil + } + unary := func( + ctx context.Context, + req any, + info *grpc.UnaryServerInfo, + handler grpc.UnaryHandler, + ) (any, error) { + if !strings.HasPrefix(info.FullMethod, adminMethodPrefix) { + return handler(ctx, req) + } + if err := check(ctx); err != nil { + return nil, err + } + return handler(ctx, req) + } + stream := func( + srv any, + ss grpc.ServerStream, + info *grpc.StreamServerInfo, + handler grpc.StreamHandler, + ) error { + if !strings.HasPrefix(info.FullMethod, adminMethodPrefix) { + return handler(srv, ss) + } + if err := check(ss.Context()); err != nil { + return err + } + return handler(srv, ss) + } + return unary, stream +} + +// ErrAdminTokenRequired is returned by NewAdminServer helpers when the operator +// failed to supply a token and also did not opt into insecure mode. +var ErrAdminTokenRequired = errors.New("admin token file required; pass --adminInsecureNoAuth to run without") diff --git a/adapter/admin_grpc_test.go b/adapter/admin_grpc_test.go new file mode 100644 index 00000000..5724165e --- /dev/null +++ b/adapter/admin_grpc_test.go @@ -0,0 +1,632 @@ +package adapter + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/bootjp/elastickv/internal/raftengine" + pb "github.com/bootjp/elastickv/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +type fakeGroup struct { + leaderID string + term uint64 + commit uint64 + applied uint64 + servers []raftengine.Server + cfgErr error +} + +func (f fakeGroup) Status() raftengine.Status { + return raftengine.Status{ + Leader: raftengine.LeaderInfo{ID: f.leaderID}, + Term: f.term, + CommitIndex: f.commit, + AppliedIndex: f.applied, + } +} + +func (f fakeGroup) Configuration(context.Context) (raftengine.Configuration, error) { + if f.cfgErr != nil { + return raftengine.Configuration{}, f.cfgErr + } + return raftengine.Configuration{Servers: append([]raftengine.Server(nil), f.servers...)}, nil +} + +func TestGetClusterOverviewReturnsSelfAndLeaders(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "node-a", GRPCAddress: "127.0.0.1:50051"}, + []NodeIdentity{{NodeID: "node-b", GRPCAddress: "127.0.0.1:50052"}}, + ) + // Populate the live Raft config with both nodes so the bootstrap seed + // for node-b is accepted by mergeSeedMembers (the scale-in fix only + // drops seeds when the live config is authoritatively empty for that + // NodeID — i.e., the node was removed via raft RemoveServer). + servers := []raftengine.Server{ + {ID: "node-a", Address: "127.0.0.1:50051"}, + {ID: "node-b", Address: "127.0.0.1:50052"}, + } + srv.RegisterGroup(1, fakeGroup{leaderID: "node-a", term: 7, servers: servers}) + srv.RegisterGroup(2, fakeGroup{leaderID: "node-b", term: 3, servers: servers}) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatalf("GetClusterOverview: %v", err) + } + if resp.Self.NodeId != "node-a" { + t.Fatalf("self = %q, want node-a", resp.Self.NodeId) + } + if len(resp.Members) != 1 || resp.Members[0].NodeId != "node-b" { + t.Fatalf("members = %v, want [node-b]", resp.Members) + } + if len(resp.GroupLeaders) != 2 { + t.Fatalf("group_leaders count = %d, want 2", len(resp.GroupLeaders)) + } +} + +func TestGetRaftGroupsExposesCommitApplied(t *testing.T) { + t.Parallel() + srv := NewAdminServer(NodeIdentity{NodeID: "n1"}, nil) + srv.RegisterGroup(1, fakeGroupWithContact{leaderID: "n1", term: 2, commit: 99, applied: 97, lastContact: 5 * time.Second}) + + // Freeze the per-server clock so the computed last-contact timestamp is + // deterministic. No package-global state is mutated, so other parallel + // tests cannot race through this seam. + fixed := time.Unix(1_000_000, 0) + srv.SetClock(func() time.Time { return fixed }) + + resp, err := srv.GetRaftGroups(context.Background(), &pb.GetRaftGroupsRequest{}) + if err != nil { + t.Fatalf("GetRaftGroups: %v", err) + } + if len(resp.Groups) != 1 { + t.Fatalf("groups = %d, want 1", len(resp.Groups)) + } + g := resp.Groups[0] + if g.CommitIndex != 99 || g.AppliedIndex != 97 || g.LeaderTerm != 2 { + t.Fatalf("unexpected state %+v", g) + } + wantLastContact := fixed.Add(-5 * time.Second).UnixMilli() + if g.LastContactUnixMs != wantLastContact { + t.Fatalf("LastContactUnixMs = %d, want %d", g.LastContactUnixMs, wantLastContact) + } +} + +// TestGetClusterOverviewUnionsSeedsAndLiveConfig asserts that +// GetClusterOverview picks up a node that was added to a Raft group after the +// admin server was constructed (scale-out). Without Configuration polling, +// the static seed list would miss it entirely. +func TestGetClusterOverviewUnionsSeedsAndLiveConfig(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1", GRPCAddress: "10.0.0.11:50051"}, + []NodeIdentity{{NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}}, + ) + // Group reports a member (n3) that is NOT in the bootstrap seed list. + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.12:50051"}, + {ID: "n3", Address: "10.0.0.13:50051"}, + }, + }) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + ids := make(map[string]string) + for _, m := range resp.Members { + ids[m.NodeId] = m.GrpcAddress + } + // Self (n1) is excluded; both seed (n2) and live-config (n3) must appear. + if len(ids) != 2 { + t.Fatalf("members = %v, want {n2, n3}", ids) + } + if ids["n2"] != "10.0.0.12:50051" || ids["n3"] != "10.0.0.13:50051" { + t.Fatalf("unexpected members %v", ids) + } +} + +// TestGetClusterOverviewDuplicateMemberIDsDeterministic pins the tie-break +// when two Raft groups disagree on a server's address (e.g. mid-readdress, +// before every group has converged): the group with the smallest ID wins and +// the result is stable across calls, so fan-out doesn't flap between stale +// and current addresses. +func TestGetClusterOverviewDuplicateMemberIDsDeterministic(t *testing.T) { + t.Parallel() + srv := NewAdminServer(NodeIdentity{NodeID: "n1"}, nil) + // Group 1 (lower ID): n2 already moved to new address. + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.22:50051"}, + }, + }) + // Group 7 (higher ID): still reports n2 at the stale address. + srv.RegisterGroup(7, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.12:50051"}, + }, + }) + + // Run overview 5 times. All must return the low-ID group's n2 address. + for i := 0; i < 5; i++ { + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Members) != 1 { + t.Fatalf("iter %d: members=%d want 1", i, len(resp.Members)) + } + if resp.Members[0].GrpcAddress != "10.0.0.22:50051" { + t.Fatalf("iter %d: got %s, want low-ID group's n2 @ 10.0.0.22:50051", i, resp.Members[0].GrpcAddress) + } + } +} + +// TestGetClusterOverviewSeedBackfillsBlankLiveAddress asserts that when a +// Raft group reports a server with NodeID set but Address="" (the etcd +// engine emits these mid-membership-update), the seed list still gets to +// backfill that ID instead of being shadowed by a blank live entry. Without +// this, GetClusterOverview would drop the peer from fan-out entirely until +// the live Configuration converged. +func TestGetClusterOverviewSeedBackfillsBlankLiveAddress(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1"}, + []NodeIdentity{{NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}}, + ) + // Live config knows n2 exists but has no address yet. + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: ""}, + }, + }) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Members) != 1 { + t.Fatalf("members = %d, want 1", len(resp.Members)) + } + got := resp.Members[0] + if got.NodeId != "n2" || got.GrpcAddress != "10.0.0.12:50051" { + t.Fatalf("members[0] = %+v, want seed n2 @ 10.0.0.12:50051 (blank live skipped)", got) + } +} + +// TestGetClusterOverviewDropsRemovedSeedAfterScaleIn asserts that a node +// that was removed from the live Raft configuration is also dropped from +// GetClusterOverview, even when it remains in the bootstrap seed list. +// Codex P2 on 14698e8d: previously mergeSeedMembers re-added any seed whose +// NodeID was missing from live config, so a decommissioned peer stayed in +// the overview forever and the admin fan-out kept dialing it. +func TestGetClusterOverviewDropsRemovedSeedAfterScaleIn(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1"}, + // Bootstrap remembered both n2 and n3, but n3 has since been removed. + []NodeIdentity{ + {NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}, + {NodeID: "n3", GRPCAddress: "10.0.0.13:50051"}, + }, + ) + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.12:50051"}, + // n3 absent — was removed via raft RemoveServer. + }, + }) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Members) != 1 || resp.Members[0].NodeId != "n2" { + t.Fatalf("members = %v, want [n2] only (n3 removed via scale-in)", resp.Members) + } +} + +// TestFanoutConfigurationCallsBoundedConcurrency asserts that +// fanoutConfigurationCalls never has more than configFanoutMaxConcurrency +// Configuration goroutines inside the RPC call at once, even when the node +// hosts more groups than the cap. Prevents a goroutine/conn burst on every +// GetClusterOverview when a node carries hundreds of shards. +func TestFanoutConfigurationCallsBoundedConcurrency(t *testing.T) { + t.Parallel() + const numGroups = configFanoutMaxConcurrency * 4 + + var ( + mu sync.Mutex + inFlight int + peakSeen int + release = make(chan struct{}) + ) + groups := make([]groupEntry, 0, numGroups) + for i := 0; i < numGroups; i++ { + + groups = append(groups, groupEntry{ + id: uint64(i + 1), //nolint:gosec // i ranges over a small bounded loop; conversion is safe. + group: testProbeGroup{cb: func() { + mu.Lock() + inFlight++ + if inFlight > peakSeen { + peakSeen = inFlight + } + mu.Unlock() + <-release // hold the call open until released + mu.Lock() + inFlight-- + mu.Unlock() + }}, + }) + } + + done := make(chan struct{}) + go func() { + _ = fanoutConfigurationCalls(t.Context(), groups) + close(done) + }() + + // Give the runtime a beat to schedule goroutines and pump them through + // the semaphore. We don't need a precise wait — peakSeen is a high-water + // mark, so any sampling moment works as long as the calls have stalled. + deadline := time.Now().Add(500 * time.Millisecond) + for time.Now().Before(deadline) { + mu.Lock() + if inFlight >= configFanoutMaxConcurrency { + mu.Unlock() + break + } + mu.Unlock() + time.Sleep(5 * time.Millisecond) + } + + mu.Lock() + got := peakSeen + mu.Unlock() + if got > configFanoutMaxConcurrency { + t.Fatalf("peak in-flight = %d, want ≤%d", got, configFanoutMaxConcurrency) + } + + close(release) // let everything drain + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("fanoutConfigurationCalls did not return after release") + } +} + +// testProbeGroup is a minimal AdminGroup for concurrency tests; it runs cb +// on every Configuration call so the test can observe in-flight count. +type testProbeGroup struct { + cb func() +} + +func (t testProbeGroup) Status() raftengine.Status { return raftengine.Status{} } +func (t testProbeGroup) Configuration(context.Context) (raftengine.Configuration, error) { + if t.cb != nil { + t.cb() + } + return raftengine.Configuration{}, nil +} + +// TestGetClusterOverviewKeepsSeedOnPartialConfigFailure asserts that a +// bootstrap seed is NOT pruned when one group's Configuration succeeds and +// another errors — a node visible only in the failing group would otherwise +// disappear from fan-out for transient reasons. Codex flagged this on +// 94851380: the round-24 fix flipped authoritative on any single success, +// which incorrectly pruned peers under partial failure. The current contract +// requires every queried group to succeed before pruning. +func TestGetClusterOverviewKeepsSeedOnPartialConfigFailure(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1"}, + []NodeIdentity{ + {NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}, + // n3 is only known via the seed list; the failing group below + // is the one that would have reported it. + {NodeID: "n3", GRPCAddress: "10.0.0.13:50051"}, + }, + ) + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.12:50051"}, + // n3 absent from this group. + }, + }) + // Group 7 errors — n3 happens to live in this group's config. + srv.RegisterGroup(7, fakeGroup{leaderID: "n1", cfgErr: context.DeadlineExceeded}) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + ids := map[string]bool{} + for _, m := range resp.Members { + ids[m.NodeId] = true + } + if !ids["n2"] { + t.Fatalf("n2 missing from members %v — live group reported it", resp.Members) + } + if !ids["n3"] { + t.Fatalf("n3 dropped under partial failure: members %v — seeds must fall through when any group errors", resp.Members) + } +} + +// TestGetClusterOverviewLiveConfigWinsOverStaleSeed asserts that when a node +// is readdressed (same NodeID, new GRPCAddress), the live Raft Configuration +// wins over the stale bootstrap seed so fan-out dials the current endpoint. +// Codex P2 on e1f0e532: previously seed was added first and later entries +// with the same ID were ignored, silently pinning the old address. +func TestGetClusterOverviewLiveConfigWinsOverStaleSeed(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1", GRPCAddress: "10.0.0.11:50051"}, + // Bootstrap: n2 lived at 10.0.0.12. + []NodeIdentity{{NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}}, + ) + // Raft config reports n2 moved to 10.0.0.22. + srv.RegisterGroup(1, fakeGroup{ + leaderID: "n1", term: 1, + servers: []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.22:50051"}, + }, + }) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Members) != 1 { + t.Fatalf("members = %d, want 1", len(resp.Members)) + } + got := resp.Members[0] + if got.NodeId != "n2" || got.GrpcAddress != "10.0.0.22:50051" { + t.Fatalf("members[0] = %+v, want n2 @ 10.0.0.22:50051 (live wins over seed)", got) + } +} + +// TestGetClusterOverviewSurvivesConfigurationError asserts that a group that +// errors on Configuration() does NOT fail the RPC — seed members are still +// returned. +func TestGetClusterOverviewSurvivesConfigurationError(t *testing.T) { + t.Parallel() + srv := NewAdminServer( + NodeIdentity{NodeID: "n1"}, + []NodeIdentity{{NodeID: "n2", GRPCAddress: "10.0.0.12:50051"}}, + ) + srv.RegisterGroup(1, fakeGroup{leaderID: "n1", cfgErr: context.DeadlineExceeded}) + + resp, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatalf("overview should not fail on group config error: %v", err) + } + if len(resp.Members) != 1 || resp.Members[0].NodeId != "n2" { + t.Fatalf("unexpected members %v", resp.Members) + } +} + +// TestGetRaftGroupsMapsUnknownLastContactToZero pins the sentinel-negative +// handling for raftengine's "unknown last contact" value (-1). The RPC +// reports 0 (epoch) in that case so the UI renders "unknown" rather than +// "contacted just now". +func TestGetRaftGroupsMapsUnknownLastContactToZero(t *testing.T) { + t.Parallel() + srv := NewAdminServer(NodeIdentity{NodeID: "n1"}, nil) + srv.RegisterGroup(1, fakeGroupWithContact{leaderID: "n1", term: 1, lastContact: -1}) + + fixed := time.Unix(2_000_000, 0) + srv.SetClock(func() time.Time { return fixed }) + + resp, err := srv.GetRaftGroups(context.Background(), &pb.GetRaftGroupsRequest{}) + if err != nil { + t.Fatal(err) + } + if got := resp.Groups[0].LastContactUnixMs; got != 0 { + t.Fatalf("LastContactUnixMs = %d, want 0 (unknown sentinel)", got) + } +} + +type fakeGroupWithContact struct { + leaderID string + term uint64 + commit uint64 + applied uint64 + lastContact time.Duration +} + +func (f fakeGroupWithContact) Status() raftengine.Status { + return raftengine.Status{ + Leader: raftengine.LeaderInfo{ID: f.leaderID}, + Term: f.term, + CommitIndex: f.commit, + AppliedIndex: f.applied, + LastContact: f.lastContact, + } +} + +func (f fakeGroupWithContact) Configuration(context.Context) (raftengine.Configuration, error) { + return raftengine.Configuration{}, nil +} + +// TestGroupOrderingIsStable locks in deterministic ascending-by-RaftGroupId +// ordering so admin UIs and diff-based tests do not see rows jump around. +func TestGroupOrderingIsStable(t *testing.T) { + t.Parallel() + srv := NewAdminServer(NodeIdentity{NodeID: "n1"}, nil) + for _, id := range []uint64{7, 2, 5, 3, 1} { + srv.RegisterGroup(id, fakeGroup{leaderID: "n1"}) + } + + groupsResp, err := srv.GetRaftGroups(context.Background(), &pb.GetRaftGroupsRequest{}) + if err != nil { + t.Fatal(err) + } + gotGroups := make([]uint64, 0, len(groupsResp.Groups)) + for _, g := range groupsResp.Groups { + gotGroups = append(gotGroups, g.RaftGroupId) + } + wantGroups := []uint64{1, 2, 3, 5, 7} + if !equalU64s(gotGroups, wantGroups) { + t.Fatalf("GetRaftGroups order = %v, want %v", gotGroups, wantGroups) + } + + overview, err := srv.GetClusterOverview(context.Background(), &pb.GetClusterOverviewRequest{}) + if err != nil { + t.Fatal(err) + } + gotLeaders := make([]uint64, 0, len(overview.GroupLeaders)) + for _, gl := range overview.GroupLeaders { + gotLeaders = append(gotLeaders, gl.RaftGroupId) + } + if !equalU64s(gotLeaders, wantGroups) { + t.Fatalf("GetClusterOverview leader order = %v, want %v", gotLeaders, wantGroups) + } +} + +func equalU64s(a, b []uint64) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func TestAdminTokenAuth(t *testing.T) { + t.Parallel() + unary, _ := AdminTokenAuth("s3cret") + if unary == nil { + t.Fatal("interceptor should be non-nil for configured token") + } + + info := &grpc.UnaryServerInfo{FullMethod: "/" + pb.Admin_ServiceDesc.ServiceName + "/GetClusterOverview"} + handler := func(_ context.Context, _ any) (any, error) { return "ok", nil } + + cases := []struct { + name string + md metadata.MD + code codes.Code + call bool + }{ + {"missing metadata", nil, codes.Unauthenticated, false}, + {"missing header", metadata.Pairs(), codes.Unauthenticated, false}, + {"wrong scheme", metadata.Pairs("authorization", "Basic zzz"), codes.Unauthenticated, false}, + {"wrong token", metadata.Pairs("authorization", "Bearer nope"), codes.Unauthenticated, false}, + {"correct", metadata.Pairs("authorization", "Bearer s3cret"), codes.OK, true}, + } + for _, tc := range cases { + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ctx := context.Background() + if tc.md != nil { + ctx = metadata.NewIncomingContext(ctx, tc.md) + } + resp, err := unary(ctx, nil, info, handler) + if tc.code == codes.OK { + if err != nil { + t.Fatalf("want OK, got %v", err) + } + if resp != "ok" { + t.Fatalf("handler not called: resp=%v", resp) + } + return + } + if status.Code(err) != tc.code { + t.Fatalf("code = %v, want %v (err=%v)", status.Code(err), tc.code, err) + } + }) + } +} + +func TestAdminTokenAuthSkipsOtherServices(t *testing.T) { + t.Parallel() + unary, _ := AdminTokenAuth("s3cret") + info := &grpc.UnaryServerInfo{FullMethod: "/RawKV/Get"} + handler := func(_ context.Context, _ any) (any, error) { return "ok", nil } + + resp, err := unary(context.Background(), nil, info, handler) + if err != nil { + t.Fatalf("non-admin method should not be gated: %v", err) + } + if resp != "ok" { + t.Fatalf("handler not called: resp=%v", resp) + } +} + +func TestAdminTokenAuthEmptyTokenDisabled(t *testing.T) { + t.Parallel() + unary, stream := AdminTokenAuth("") + if unary != nil || stream != nil { + t.Fatal("empty token should disable interceptors") + } +} + +// hangingGroup never returns from Configuration until ctx fires. Used to +// prove collectLiveMembers stops blocking the merge phase as soon as the +// caller cancels, even if one Configuration call is stuck. +type hangingGroup struct{ fakeGroup } + +func (h hangingGroup) Configuration(ctx context.Context) (raftengine.Configuration, error) { + <-ctx.Done() + return raftengine.Configuration{}, ctx.Err() +} + +// TestCollectLiveMembersHonoursCtxCancel asserts that collectLiveMembers +// returns promptly when ctx is cancelled, even if one Configuration call +// is stuck. Pre-fix, the wg.Wait() inside collectLiveMembers would block +// the merge phase (and the entire GetClusterOverview RPC) on the slowest +// group regardless of ctx state. Post-fix, the merge runs over whatever +// landed before the cancel; the stuck Configuration goroutine unwinds +// asynchronously when its ctx.Done fires. +func TestCollectLiveMembersHonoursCtxCancel(t *testing.T) { + t.Parallel() + + groups := []groupEntry{ + {id: 1, group: hangingGroup{}}, + {id: 2, group: hangingGroup{}}, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + resCh := make(chan liveMembers, 1) + go func() { + resCh <- collectLiveMembers(ctx, groups, "self") + }() + + select { + case r := <-resCh: + // With ctx already cancelled, no live config landed; expect empty maps. + if len(r.addrByID) != 0 || len(r.order) != 0 { + t.Fatalf("expected empty results on early cancel, got addrByID=%v order=%v", r.addrByID, r.order) + } + case <-time.After(1 * time.Second): + t.Fatal("collectLiveMembers blocked past 1s despite cancelled ctx — wg.Wait() regression?") + } +} diff --git a/cmd/elastickv-admin/main.go b/cmd/elastickv-admin/main.go new file mode 100644 index 00000000..ca04e396 --- /dev/null +++ b/cmd/elastickv-admin/main.go @@ -0,0 +1,1024 @@ +// Command elastickv-admin serves the Elastickv admin Web UI described in +// docs/admin_ui_key_visualizer_design.md. Phase 0: token-protected passthrough +// of Admin.GetClusterOverview at /api/cluster/overview, no SPA yet. +package main + +import ( + "bytes" + "context" + "crypto/tls" + "crypto/x509" + "encoding/json" + "flag" + "fmt" + "log" + "net" + "net/http" + "os" + "os/signal" + "strings" + "sync" + "syscall" + "time" + + internalutil "github.com/bootjp/elastickv/internal" + pb "github.com/bootjp/elastickv/proto" + "github.com/cockroachdb/errors" + "golang.org/x/sync/singleflight" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/proto" +) + +const ( + defaultBindAddr = "127.0.0.1:8080" + defaultNodesRefreshInterval = 15 * time.Second + defaultGRPCRequestTimeout = 10 * time.Second + discoveryRPCTimeout = 2 * time.Second + // discoveryWaitBudget is how long a request handler is willing to wait + // for the singleflight membership refresh before falling back to the + // cached (or static seed) list. Kept well below defaultGRPCRequestTimeout + // so a slow discovery cannot starve the subsequent per-node fan-out. + discoveryWaitBudget = 3 * time.Second + // membershipRefreshBudget caps the detached background refresh so it + // cannot run forever even if every seed is slow. Sized for up to a few + // sequential discoveryRPCTimeout attempts before the singleflight + // collapses. + membershipRefreshBudget = 10 * time.Second + readHeaderTimeout = 5 * time.Second + readTimeout = 30 * time.Second + writeTimeout = 30 * time.Second + idleTimeout = 120 * time.Second + shutdownTimeout = 5 * time.Second + maxRequestBodyBytes = 4 << 10 + // maxTokenFileBytes caps the admin-token file so a misconfigured path + // pointing at a huge file (for example a log) cannot force the admin + // process to allocate arbitrary memory before the bearer-token check. + maxTokenFileBytes = 4 << 10 + // defaultMaxDiscoveredNodes is the out-of-the-box cap on the member + // list returned by a peer's GetClusterOverview. The runtime value is + // the per-fanout maxDiscoveredNodes field, configurable via + // --maxDiscoveredNodes; this constant is just the default. + // + // A single /api/cluster/overview fan-out dials every discovered node + // up to this cap; the per-conn cache is sized to match so a healthy + // cluster-wide query reuses connections instead of thrashing the LRU. + defaultMaxDiscoveredNodes = 512 +) + +var ( + bindAddr = flag.String("bindAddr", defaultBindAddr, "HTTP bind address for the admin UI") + nodes = flag.String("nodes", "", "Comma-separated list of elastickv node gRPC addresses") + nodeTokenFile = flag.String("nodeTokenFile", "", "File containing the bearer token sent to nodes' Admin service") + nodesRefreshInterval = flag.Duration("nodesRefreshInterval", defaultNodesRefreshInterval, "Duration to cache cluster membership before re-fetching") + insecureNoAuth = flag.Bool("adminInsecureNoAuth", false, "Skip bearer token authentication; development only") + // Node gRPC is plaintext in Phase 0, so the admin binary defaults to + // plaintext too. TLS is opt-in: set --nodeTLSCACertFile (preferred) or + // --nodeTLSInsecureSkipVerify to switch to TLS. When the cluster turns + // on TLS, operators flip the flag without code changes. + nodeTLSCACertFile = flag.String("nodeTLSCACertFile", "", "PEM file with CA certificates used to verify nodes' gRPC TLS; setting this flag enables TLS dialing") + nodeTLSServerName = flag.String("nodeTLSServerName", "", "Expected TLS server name when connecting to nodes (overrides the address host); only honoured when TLS is enabled") + nodeTLSSkipVerify = flag.Bool("nodeTLSInsecureSkipVerify", false, "Dial nodes with TLS but skip certificate verification; development only. Implies TLS.") + allowRemoteBind = flag.Bool("allowRemoteBind", false, "Allow --bindAddr to listen on a non-loopback interface. The admin UI has no browser-facing auth; set this only when the UI is fronted by an authenticating reverse proxy.") + // --maxDiscoveredNodes bounds both the discovery list returned by a + // peer's GetClusterOverview and the per-conn client cache. Operators + // running clusters larger than the default 512 nodes can raise this; + // values ≤0 fall back to the default to avoid disabling the bound. + maxDiscoveredNodesFlag = flag.Int("maxDiscoveredNodes", defaultMaxDiscoveredNodes, "Maximum number of cluster nodes the admin binary will fan out to (caps both discovery list size and the gRPC client-conn cache)") +) + +func main() { + flag.Parse() + if err := run(); err != nil { + log.Fatal(err) + } +} + +type runConfig struct { + seeds []string + fan *fanout +} + +// initRun consolidates flag parsing and fanout construction so run() stays +// under the project's cyclop budget. +func initRun() (runConfig, error) { + seeds := splitNodes(*nodes) + if len(seeds) == 0 { + return runConfig{}, errors.New("--nodes is required (comma-separated gRPC addresses)") + } + token, err := loadToken(*nodeTokenFile, *insecureNoAuth) + if err != nil { + return runConfig{}, err + } + if err := validateBindAddr(*bindAddr, *allowRemoteBind); err != nil { + return runConfig{}, err + } + creds, err := loadTransportCredentials(*nodeTLSCACertFile, *nodeTLSServerName, *nodeTLSSkipVerify) + if err != nil { + return runConfig{}, err + } + fan := newFanout(seeds, token, *nodesRefreshInterval, creds, *maxDiscoveredNodesFlag) + return runConfig{seeds: seeds, fan: fan}, nil +} + +// buildMux wires the Phase 0 HTTP surface. Lives outside run() both for +// testability and to keep run() under the cyclop budget. +func buildMux(fan *fanout) *http.ServeMux { + mux := http.NewServeMux() + mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + mux.HandleFunc("/api/cluster/overview", fan.handleOverview) + mux.HandleFunc("/api/", func(w http.ResponseWriter, _ *http.Request) { + writeJSONError(w, http.StatusServiceUnavailable, "endpoint not implemented in phase 0") + }) + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + _, _ = w.Write([]byte("elastickv-admin: phase 0 — SPA not yet embedded\n")) + }) + return mux +} + +func run() error { + cfg, err := initRun() + if err != nil { + return err + } + defer cfg.fan.Close() + + srv := &http.Server{ + Addr: *bindAddr, + Handler: buildMux(cfg.fan), + ReadHeaderTimeout: readHeaderTimeout, + ReadTimeout: readTimeout, + WriteTimeout: writeTimeout, + IdleTimeout: idleTimeout, + } + + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + errCh := make(chan error, 1) + go func() { + log.Printf("elastickv-admin listening on %s (seeds=%v)", *bindAddr, cfg.seeds) + if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + errCh <- err + return + } + errCh <- nil + }() + + select { + case <-ctx.Done(): + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer shutdownCancel() + if err := srv.Shutdown(shutdownCtx); err != nil { + return errors.Wrap(err, "shutdown") + } + return nil + case err := <-errCh: + return err + } +} + +// validateBindAddr rejects a non-loopback bind unless the operator has +// explicitly opted into --allowRemoteBind. The admin binary performs no +// browser-side authentication in Phase 0 while holding a privileged node +// admin token, so a misconfigured 0.0.0.0:8080 would expose that token-gated +// cluster view to anyone on the network. +func validateBindAddr(addr string, allow bool) error { + if allow { + return nil + } + host, _, err := net.SplitHostPort(addr) + if err != nil { + return errors.Wrapf(err, "invalid --bindAddr %q", addr) + } + host = strings.TrimSpace(host) + if host == "" { + return fmt.Errorf("--bindAddr %q has an empty host; pass an explicit loopback host like 127.0.0.1 or set --allowRemoteBind when fronted by an auth proxy", addr) + } + ip := net.ParseIP(host) + switch { + case host == "localhost": + return nil + case ip != nil && ip.IsLoopback(): + return nil + } + return fmt.Errorf("--bindAddr %q is not loopback; set --allowRemoteBind to expose the admin UI remotely (the UI has no browser-side auth — do so only behind an auth proxy)", addr) +} + +func splitNodes(raw string) []string { + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} + +func loadToken(path string, insecureMode bool) (string, error) { + if path == "" { + if insecureMode { + return "", nil + } + return "", errors.New("--nodeTokenFile is required; pass --adminInsecureNoAuth for insecure dev mode") + } + if insecureMode { + return "", errors.New("--adminInsecureNoAuth and --nodeTokenFile are mutually exclusive") + } + tok, err := internalutil.LoadBearerTokenFile(path, maxTokenFileBytes, "admin token") + if err != nil { + return "", errors.Wrap(err, "load admin token") + } + return tok, nil +} + +// loadTransportCredentials builds the gRPC TransportCredentials used to dial +// nodes. Phase 0 nodes expose a plaintext gRPC server, so the default is +// insecure credentials — if neither --nodeTLSCACertFile nor +// --nodeTLSInsecureSkipVerify is set, the admin binary dials plaintext. +// Passing either flag opts into TLS; --nodeTLSServerName is honoured only +// alongside a TLS opt-in. +func loadTransportCredentials( + caFile, serverName string, + skipVerify bool, +) (credentials.TransportCredentials, error) { + tlsRequested := caFile != "" || skipVerify + if !tlsRequested { + if serverName != "" { + return nil, errors.New("--nodeTLSServerName requires TLS; set --nodeTLSCACertFile or --nodeTLSInsecureSkipVerify") + } + return insecure.NewCredentials(), nil + } + if caFile != "" && skipVerify { + return nil, errors.New("--nodeTLSCACertFile and --nodeTLSInsecureSkipVerify are mutually exclusive") + } + cfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + ServerName: serverName, + InsecureSkipVerify: skipVerify, //nolint:gosec // gated behind --nodeTLSInsecureSkipVerify; dev-only. + } + if caFile != "" { + pem, err := os.ReadFile(caFile) + if err != nil { + return nil, errors.Wrap(err, "read node TLS CA file") + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(pem) { + return nil, errors.New("no certificates parsed from --nodeTLSCACertFile") + } + cfg.RootCAs = pool + } + return credentials.NewTLS(cfg), nil +} + +type nodeClient struct { + addr string + conn *grpc.ClientConn + client pb.AdminClient + + // refcount, evicted, and closed are protected by fanout.mu. They let the + // cache evict entries while RPCs are in flight: eviction removes the + // entry from the map and marks it evicted, and the conn is closed only + // once the last borrower calls release. closed guards against a second + // release on an already-closed client so the public contract (extra + // release() calls are no-ops) holds even when refcount transiently + // bounces back to zero. + refcount int + evicted bool + closed bool +} + +type membership struct { + addrs []string + fetchedAt time.Time +} + +type fanout struct { + seeds []string + // seedSet is a pre-computed lookup over seeds for evictOneLocked's + // "skip seed entries" check. Seeds are immutable after construction so + // rebuilding the map on every cache-full eviction (under f.mu) is pure + // waste — Gemini flagged the per-call allocation. + seedSet map[string]struct{} + // maxNodes bounds both the per-overview discovery list and the gRPC + // client cache. Configurable via --maxDiscoveredNodes; values ≤0 fall + // back to defaultMaxDiscoveredNodes so the bound is never disabled. + maxNodes int + token string + refreshInterval time.Duration + creds credentials.TransportCredentials + + mu sync.Mutex + clients map[string]*nodeClient + members *membership + closed bool + + // refreshGroup deduplicates concurrent membership refresh RPCs so a burst + // of browser requests immediately after cache expiry collapses into a + // single GetClusterOverview call against one seed. + refreshGroup singleflight.Group + + // dialGroup deduplicates concurrent grpc.NewClient calls for the same + // address. Without it, N goroutines that all miss the cache for the + // same addr would each run a parallel dial (DNS/parsing/setup); only + // one is kept. With singleflight, only one dial runs and every waiter + // gets the same *grpc.ClientConn — refcount is bumped per waiter + // before they each return. + dialGroup singleflight.Group +} + +// errFanoutClosed is returned by clientFor when Close has already run, so +// callers can treat it as a graceful shutdown signal instead of bubbling up as +// a generic map-panic. +var errFanoutClosed = errors.New("admin fanout is closed") + +func newFanout( + seeds []string, + token string, + refreshInterval time.Duration, + creds credentials.TransportCredentials, + maxNodes int, +) *fanout { + if refreshInterval <= 0 { + refreshInterval = defaultNodesRefreshInterval + } + if creds == nil { + creds = insecure.NewCredentials() + } + if maxNodes <= 0 { + maxNodes = defaultMaxDiscoveredNodes + } + seedSet := make(map[string]struct{}, len(seeds)) + for _, s := range seeds { + seedSet[s] = struct{}{} + } + return &fanout{ + seeds: seeds, + seedSet: seedSet, + maxNodes: maxNodes, + token: token, + refreshInterval: refreshInterval, + creds: creds, + clients: make(map[string]*nodeClient), + } +} + +func (f *fanout) Close() { + f.mu.Lock() + if f.closed { + f.mu.Unlock() + return + } + f.closed = true + // Shutdown is an intentional cancellation of any in-flight RPCs; close + // connections eagerly and let borrowers see the cancel. Borrowers that + // still hold leases will observe the conn as closed on their next call. + // Mark each client closed inside the lock so the deferred release path + // does not attempt a double-close, then collect the *grpc.ClientConn + // references and run conn.Close() outside the lock — Close() can do + // network I/O and waits for the gRPC client transport to drain, which + // would block any concurrent clientFor / invalidateClient / RPC waiting + // on f.mu for the entire shutdown window. + conns := make([]struct { + addr string + conn *grpc.ClientConn + }, 0, len(f.clients)) + for _, c := range f.clients { + if c.closed { + continue + } + c.closed = true + conns = append(conns, struct { + addr string + conn *grpc.ClientConn + }{addr: c.addr, conn: c.conn}) + } + // Replace with an empty map rather than nil so the remaining + // closed-guarded accessors can still iterate or lookup without panicking + // while still releasing the client references for GC. + f.clients = map[string]*nodeClient{} + f.mu.Unlock() + + for _, e := range conns { + if err := e.conn.Close(); err != nil { + log.Printf("elastickv-admin: close gRPC connection to %s: %v", e.addr, err) + } + } +} + +// clientFor returns a leased nodeClient that callers must release once they +// finish the RPC (release is the second return value, always non-nil and safe +// to call). The cache is bounded by maxCachedClients; if the cache is full, +// one entry is evicted — prefer non-seed victims, fall back to any entry when +// the cache is saturated with seeds. Evicted entries stop accepting new leases +// but their underlying *grpc.ClientConn is kept alive until every outstanding +// borrower has released; this prevents an eviction from canceling a healthy +// concurrent GetClusterOverview. +// +// The dial step (grpc.NewClient) runs outside f.mu through a singleflight +// keyed by addr — concurrent dials for the same addr collapse into one, +// avoiding wasted DNS/parsing work plus the post-dial close-the-loser +// dance. NewClient itself is non-blocking but parses the target and may +// trigger synchronous DNS depending on resolver config, so holding the +// global mutex for that wall-clock time would serialize concurrent +// clientFor calls for distinct addrs. +func (f *fanout) clientFor(addr string) (*nodeClient, func(), error) { + if c, release, err, ok := f.cacheLookup(addr); ok { + return c, release, err + } + conn, err := f.dialDeduped(addr) + if err != nil { + return nil, func() {}, err + } + return f.installOrAttach(addr, conn) +} + +// cacheLookup returns (client, release, err, true) when either the cache hit +// or the fanout-closed branch fires; the caller can short-circuit. Returns +// (_,_,_,false) when the caller still needs to dial. +func (f *fanout) cacheLookup(addr string) (*nodeClient, func(), error, bool) { + f.mu.Lock() + if f.closed { + f.mu.Unlock() + return nil, func() {}, errFanoutClosed, true + } + if c, ok := f.clients[addr]; ok { + c.refcount++ + release := f.releaseFunc(c) + f.mu.Unlock() + return c, release, nil, true + } + f.mu.Unlock() + return nil, nil, nil, false +} + +// dialDeduped runs grpc.NewClient inside the dialGroup singleflight so +// concurrent first-time dials for addr collapse to one conn. +func (f *fanout) dialDeduped(addr string) (*grpc.ClientConn, error) { + v, err, _ := f.dialGroup.Do(addr, func() (any, error) { + return grpc.NewClient( + addr, + grpc.WithTransportCredentials(f.creds), + internalutil.GRPCCallOptions(), + ) + }) + if err != nil { + return nil, errors.Wrapf(err, "dial %s", addr) + } + conn, ok := v.(*grpc.ClientConn) + if !ok { + return nil, fmt.Errorf("dial %s: unexpected singleflight value type %T", addr, v) + } + return conn, nil +} + +// installOrAttach installs the just-dialed conn into the cache or, if a +// concurrent waiter beat us to it, takes a lease on the existing entry and +// closes the orphaned conn (when its pointer differs from the cached entry). +func (f *fanout) installOrAttach(addr string, conn *grpc.ClientConn) (*nodeClient, func(), error) { + f.mu.Lock() + if f.closed { + f.mu.Unlock() + if err := conn.Close(); err != nil { + log.Printf("elastickv-admin: close orphaned dial for %s after shutdown: %v", addr, err) + } + return nil, func() {}, errFanoutClosed + } + // If another waiter already installed a cache entry, take a lease on it. + // Two cases: (a) singleflight collapsed concurrent dials so the cached + // entry's conn IS this conn (same pointer) — must NOT Close it because + // the cache holds the only reference; (b) a non-concurrent earlier dial + // installed a different conn before our Do call — our just-dialed conn + // is orphaned and must be closed to avoid leaking fds/goroutines. + if c, ok := f.clients[addr]; ok { + c.refcount++ + release := f.releaseFunc(c) + shouldClose := c.conn != conn + f.mu.Unlock() + if shouldClose { + if err := conn.Close(); err != nil { + log.Printf("elastickv-admin: close orphaned dial for %s: %v", addr, err) + } + } + return c, release, nil + } + var evicted *grpc.ClientConn + if len(f.clients) >= f.maxNodes { + evicted = f.evictOneLocked() + } + c := &nodeClient{addr: addr, conn: conn, client: pb.NewAdminClient(conn), refcount: 1} + f.clients[addr] = c + release := f.releaseFunc(c) + f.mu.Unlock() + if evicted != nil { + if err := evicted.Close(); err != nil { + log.Printf("elastickv-admin: evict-close: %v", err) + } + } + return c, release, nil +} + +// releaseFunc returns the closer used to drop a lease. On the last release +// of an evicted client the underlying connection is finally closed; that +// Close() runs after f.mu is dropped because grpc.ClientConn.Close can do +// network I/O and waits for the transport to drain — holding the global +// fanout mutex across that would block any concurrent clientFor / +// invalidateClient / RPC waiting on f.mu. Extra release() calls after the +// conn is already closed are safe no-ops. +func (f *fanout) releaseFunc(c *nodeClient) func() { + return func() { + f.mu.Lock() + if c.refcount > 0 { + c.refcount-- + } + var toClose *grpc.ClientConn + if c.refcount == 0 && c.evicted && !c.closed { + c.closed = true + toClose = c.conn + } + f.mu.Unlock() + + if toClose == nil { + return + } + if err := toClose.Close(); err != nil { + log.Printf("elastickv-admin: deferred close for %s: %v", c.addr, err) + } + } +} + +// evictOneLocked removes exactly one entry from f.clients. Prefers non-seed +// entries; falls back to any entry if none are eligible (for example when +// len(seeds) >= maxCachedClients). Returns the *grpc.ClientConn that needs +// closing (or nil if the entry has outstanding leases or was already +// closed) — caller must run Close() outside f.mu. Closing is deferred to +// the last release (see releaseFunc) when leases are still held. +func (f *fanout) evictOneLocked() *grpc.ClientConn { + var fallback string + var fallbackClient *nodeClient + for victim, vc := range f.clients { + if fallback == "" { + fallback, fallbackClient = victim, vc + } + if _, keep := f.seedSet[victim]; keep { + continue + } + return f.retireLocked(victim, vc) + } + if fallbackClient != nil { + return f.retireLocked(fallback, fallbackClient) + } + return nil +} + +// retireLocked removes a client from the cache and, if no lease is currently +// held, marks it for closing. Returns the connection that needs to be closed +// (or nil) so the caller can run conn.Close() outside f.mu — Close() blocks +// on transport teardown and must not run with the global fanout mutex held. +// Otherwise the connection stays open until the last borrower releases, so +// an evicted entry never cancels an in-flight RPC. Idempotent — double-retiring +// or retiring after the last release is a no-op. Caller must hold f.mu. +func (f *fanout) retireLocked(addr string, c *nodeClient) *grpc.ClientConn { + delete(f.clients, addr) + if c.evicted { + return nil + } + c.evicted = true + if c.refcount > 0 || c.closed { + return nil + } + c.closed = true + return c.conn +} + +// invalidateClient drops a cached connection — used when a peer returns +// Unavailable so the next request re-dials or skips the removed node. The +// connection stays open until the last borrower releases, so invalidating +// does not cancel other goroutines' in-flight RPCs. +func (f *fanout) invalidateClient(addr string) { + f.mu.Lock() + if f.closed { + f.mu.Unlock() + return + } + f.members = nil + var toClose *grpc.ClientConn + if c, ok := f.clients[addr]; ok { + toClose = f.retireLocked(addr, c) + } + f.mu.Unlock() + + if toClose != nil { + if err := toClose.Close(); err != nil { + log.Printf("elastickv-admin: invalidate %s: close: %v", addr, err) + } + } +} + +func (f *fanout) outgoingCtx(parent context.Context) context.Context { + if f.token == "" { + return parent + } + return metadata.AppendToOutgoingContext(parent, "authorization", "Bearer "+f.token) +} + +// currentTargets returns the list of node addresses to fan out to. If the +// membership cache is fresh it is returned directly; otherwise the admin binary +// queries seeds via GetClusterOverview and caches the resulting member list +// for refreshInterval. Concurrent refreshes are collapsed through singleflight +// so a burst of requests after cache expiry hits only one seed. The shared +// refresh runs on a detached background context bounded by +// membershipRefreshBudget so one caller canceling (e.g., browser tab close) +// does not abort the work for every other concurrent waiter. On total failure +// the admin binary falls back to the static seed list so a single unreachable +// seed does not take the admin offline. +func (f *fanout) currentTargets(ctx context.Context) []string { + f.mu.Lock() + if f.members != nil && time.Since(f.members.fetchedAt) < f.refreshInterval { + addrs := append([]string(nil), f.members.addrs...) + f.mu.Unlock() + return addrs + } + f.mu.Unlock() + + ch := f.refreshGroup.DoChan("members", func() (any, error) { + bgCtx, cancel := context.WithTimeout(context.Background(), membershipRefreshBudget) + defer cancel() + return f.refreshMembership(bgCtx), nil + }) + select { + case r := <-ch: + // refreshMembership always returns a []string today, but explicitly + // check the assertion so a future return-type change turns into a + // loud, non-panicking fallback to seeds instead of a silent crash. + if addrs, ok := r.Val.([]string); ok { + return addrs + } + log.Printf("elastickv-admin: membership refresh returned unexpected type %T; falling back to seeds", r.Val) + return f.seedTargets() + case <-ctx.Done(): + // Caller bailed. Give them whatever targets we can assemble without + // blocking: the last cached membership if we have one, else seeds. + // The detached refresh continues in the background and will populate + // the cache for the next request. + f.mu.Lock() + defer f.mu.Unlock() + if f.members != nil { + return append([]string(nil), f.members.addrs...) + } + return f.seedTargets() + } +} + +// seedTargets returns a deduplicated copy of f.seeds clamped to f.maxNodes. +// Callers use it on seed-fallback paths (discovery error, ctx cancel, +// unexpected refresh result, no cached members yet) so a misconfigured huge +// --nodes list never bypasses the fan-out bound that membersFrom otherwise +// enforces. Codex P2 on 501b0173: previously these paths returned the raw +// f.seeds, which under outages or oversized seed lists could spawn more +// concurrent RPCs than configured. +func (f *fanout) seedTargets() []string { + cap := f.maxNodes + if cap <= 0 { + cap = defaultMaxDiscoveredNodes + } + if len(f.seeds) < cap { + cap = len(f.seeds) + } + out := make([]string, 0, cap) + seen := make(map[string]struct{}, cap) + for _, s := range f.seeds { + if _, dup := seen[s]; dup { + continue + } + if len(out) >= f.maxNodes && f.maxNodes > 0 { + break + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + +// refreshMembership performs the actual discovery RPC. It honours the caller's +// context for overall cancellation but derives a short per-seed timeout from +// discoveryRPCTimeout so a slow first seed does not stall the whole request. +func (f *fanout) refreshMembership(ctx context.Context) []string { + for _, seed := range f.seeds { + cli, release, err := f.clientFor(seed) + if err != nil { + log.Printf("elastickv-admin: dial seed %s: %v", seed, err) + continue + } + rpcCtx, cancel := context.WithTimeout(ctx, discoveryRPCTimeout) + resp, err := cli.client.GetClusterOverview(f.outgoingCtx(rpcCtx), &pb.GetClusterOverviewRequest{}) + cancel() + release() + if err != nil { + if status.Code(err) == codes.Unavailable { + f.invalidateClient(seed) + } + log.Printf("elastickv-admin: discover membership via %s: %v", seed, err) + continue + } + addrs := membersFrom(seed, resp, f.maxNodes) + f.mu.Lock() + f.members = &membership{addrs: addrs, fetchedAt: time.Now()} + f.mu.Unlock() + return append([]string(nil), addrs...) + } + + log.Printf("elastickv-admin: all seeds unreachable for membership refresh; falling back to static seed list") + return f.seedTargets() +} + +// membersFrom extracts a deduplicated address list from a cluster overview +// response, always including the node that answered so the answering seed is +// still queried even if it omits itself from members. The result is capped at +// maxDiscoveredNodes so a malicious or misconfigured peer cannot inflate the +// fan-out. +// +// Deduplication keys on NodeID when available, falling back to the raw +// grpc_address otherwise. This prevents a fan-out from querying the same +// node twice when the seed address (e.g. "localhost:50051") and the node's +// self-advertised address (e.g. "127.0.0.1:50051") are different aliases +// for the same process — Codex flagged that the previous address-only +// dedup distorted overview results in that case. +// +// Initial slice capacity is bounded by maxDiscoveredNodes (rather than +// len(members)+1) so a misbehaving peer that returns 10× the cap does not +// force a giant allocation just to truncate immediately afterward. +func membersFrom(seed string, resp *pb.GetClusterOverviewResponse, maxNodes int) []string { + if maxNodes <= 0 { + maxNodes = defaultMaxDiscoveredNodes + } + acc := newDiscoveryAccumulator(len(resp.GetMembers())+1, maxNodes) + + // Add the seed under the responding node's ID so a later entry for that + // same NodeID (most likely resp.Self.GrpcAddress, an alias of the seed) + // is deduped instead of producing a duplicate fan-out target. + self := resp.GetSelf() + var selfID string + if self != nil { + selfID = self.GetNodeId() + } + acc.add(selfID, seed) + + // When the response advertises a different self.GrpcAddress, only add it + // when we have no NodeID to anchor the seed to (legacy nodes); otherwise + // the dedup above already covers it. + if self != nil && selfID == "" { + acc.add("", self.GetGrpcAddress()) + } + + for _, m := range resp.GetMembers() { + acc.add(m.GetNodeId(), m.GetGrpcAddress()) + } + if acc.truncated { + log.Printf("elastickv-admin: discovery response exceeded %d nodes; truncating (peer=%s)", maxNodes, seed) + } + return acc.out +} + +// discoveryAccumulator dedups (NodeID, address) pairs while building the +// fan-out target list. Extracted from membersFrom so the surrounding loop +// stays under the cyclop budget. The cap is per-instance (not a package +// constant) so operators can raise it via --maxDiscoveredNodes for very +// large clusters. +type discoveryAccumulator struct { + out []string + seenAddr map[string]struct{} + seenID map[string]struct{} + maxNodes int + truncated bool +} + +func newDiscoveryAccumulator(suggestedCap, maxNodes int) *discoveryAccumulator { + if maxNodes <= 0 { + maxNodes = defaultMaxDiscoveredNodes + } + if suggestedCap > maxNodes { + suggestedCap = maxNodes + } + return &discoveryAccumulator{ + out: make([]string, 0, suggestedCap), + seenAddr: map[string]struct{}{}, + seenID: map[string]struct{}{}, + maxNodes: maxNodes, + } +} + +// add records a fan-out target keyed by its NodeID (when known) and address. +// Returns silently when the entry is empty, a duplicate, or would push the +// list past maxNodes; the caller can read truncated to log a truncation +// event once. +func (a *discoveryAccumulator) add(id, addr string) { + addr = strings.TrimSpace(addr) + if addr == "" { + return + } + if _, dup := a.seenAddr[addr]; dup { + return + } + if id != "" { + if _, dup := a.seenID[id]; dup { + return + } + } + if len(a.out) >= a.maxNodes { + a.truncated = true + return + } + a.seenAddr[addr] = struct{}{} + if id != "" { + a.seenID[id] = struct{}{} + } + a.out = append(a.out, addr) +} + +// perNodeResult wraps a fan-out response from one node. Data is stored as +// json.RawMessage so it can be filled with a protojson-encoded protobuf +// message — encoding/json would lose the proto3 field-name mapping and +// well-known-type handling. +type perNodeResult struct { + Node string `json:"node"` + OK bool `json:"ok"` + Error string `json:"error,omitempty"` + Data json.RawMessage `json:"data,omitempty"` +} + +// marshalProto encodes a protobuf message with the JSON mapping that preserves +// proto3 field names and well-known-type semantics. +var protoMarshaler = protojson.MarshalOptions{EmitUnpopulated: true, UseProtoNames: false} + +func marshalProto(m proto.Message) (json.RawMessage, error) { + raw, err := protoMarshaler.Marshal(m) + if err != nil { + return nil, errors.Wrap(err, "protojson marshal") + } + return raw, nil +} + +func (f *fanout) handleOverview(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + writeJSONError(w, http.StatusMethodNotAllowed, "method not allowed") + return + } + r.Body = http.MaxBytesReader(w, r.Body, maxRequestBodyBytes) + + // Split the discovery and per-node fan-out budgets. Reusing one ctx for + // both lets a slow membership refresh consume the entire deadline and + // leave the fan-out with an already-canceled context, so separate them. + discoveryCtx, discoveryCancel := context.WithTimeout(r.Context(), discoveryWaitBudget) + targets := f.currentTargets(discoveryCtx) + discoveryCancel() + + ctx, cancel := context.WithTimeout(r.Context(), defaultGRPCRequestTimeout) + defer cancel() + results := make([]perNodeResult, len(targets)) + var wg sync.WaitGroup + for i, addr := range targets { + wg.Add(1) + go func(i int, addr string) { + defer wg.Done() + entry := perNodeResult{Node: addr} + cli, release, err := f.clientFor(addr) + if err != nil { + entry.Error = err.Error() + results[i] = entry + return + } + defer release() + resp, err := cli.client.GetClusterOverview(f.outgoingCtx(ctx), &pb.GetClusterOverviewRequest{}) + if err != nil { + if status.Code(err) == codes.Unavailable { + f.invalidateClient(addr) + } + entry.Error = err.Error() + results[i] = entry + return + } + data, mErr := marshalProto(resp) + if mErr != nil { + entry.Error = errors.Wrap(mErr, "marshal response").Error() + results[i] = entry + return + } + entry.OK = true + entry.Data = data + results[i] = entry + }(i, addr) + } + wg.Wait() + + writeJSON(w, http.StatusOK, map[string]any{"nodes": results}) +} + +// maxResponseBodyBytes caps writeJSON's encode buffer. Worst-case sizing: +// fan-out hits at most maxDiscoveredNodes (=512) nodes, each returning a +// GetClusterOverview proto. The proto is dominated by the members list +// (≤maxDiscoveredNodes entries × ~few-hundred bytes each) plus the group +// leaders map (one entry per Raft group; clusters carry tens, not hundreds), +// so the per-node JSON is bounded around ~150 KiB and the aggregated body is +// bounded around 75 MiB even before deduplication. The 128 MiB cap below +// comfortably covers that worst case while still rejecting clearly +// oversized payloads; operators running clusters where the overview +// legitimately exceeds this can raise the constant. Keep this aligned with +// handleOverview's fan-out cap so a misbehaving node cannot force unbounded +// memory growth. +const maxResponseBodyBytes = 128 << 20 + +// writeJSONBufferPool reuses encode buffers across requests so a steady stream +// of /api/* calls doesn't churn the heap with per-request allocations. The +// pool stores *bytes.Buffer; each user resets and bounds the buffer. +var writeJSONBufferPool = sync.Pool{ + New: func() any { return new(bytes.Buffer) }, +} + +// writeJSON marshals body into a pooled, size-capped buffer first, so an +// encoding failure can still surface as a 500 instead of a truncated body +// under a committed 2xx header. The cap (maxResponseBodyBytes) bounds memory +// even if a misbehaving downstream returns an oversized payload. +func writeJSON(w http.ResponseWriter, code int, body any) { + buf, ok := writeJSONBufferPool.Get().(*bytes.Buffer) + if !ok { + buf = new(bytes.Buffer) + } + defer func() { + // Drop very large buffers rather than retaining them in the pool — + // keeps steady-state memory close to the typical response size. + const maxRetainBytes = 1 << 20 + if buf.Cap() > maxRetainBytes { + return + } + buf.Reset() + writeJSONBufferPool.Put(buf) + }() + buf.Reset() + + limited := &cappedWriter{w: buf, max: maxResponseBodyBytes} + if err := json.NewEncoder(limited).Encode(body); err != nil || limited.exceeded { + if limited.exceeded { + log.Printf("elastickv-admin: response exceeded %d-byte cap; returning 500", maxResponseBodyBytes) + } else { + log.Printf("elastickv-admin: encode JSON response: %v", err) + } + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.WriteHeader(http.StatusInternalServerError) + const fallback = `{"code":500,"message":"internal server error"}` + "\n" + if _, werr := w.Write([]byte(fallback)); werr != nil { + log.Printf("elastickv-admin: write fallback response: %v", werr) + } + return + } + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.WriteHeader(code) + if _, err := w.Write(buf.Bytes()); err != nil { + log.Printf("elastickv-admin: write JSON response: %v", err) + } +} + +// cappedWriter wraps an io.Writer and refuses writes once `written` would +// exceed `max`. Used by writeJSON so json.Encoder stops streaming bytes into +// the buffer past the cap; the encoder reports the short-write and writeJSON +// returns a 500 instead of an oversized body. +type cappedWriter struct { + w *bytes.Buffer + max int + written int + exceeded bool +} + +func (c *cappedWriter) Write(p []byte) (int, error) { + if c.exceeded { + return 0, errors.New("response body cap exceeded") + } + if c.written+len(p) > c.max { + c.exceeded = true + return 0, fmt.Errorf("response body would exceed %d bytes", c.max) + } + n, err := c.w.Write(p) + c.written += n + if err != nil { + return n, errors.Wrap(err, "buffer write") + } + return n, nil +} + +func writeJSONError(w http.ResponseWriter, code int, msg string) { + writeJSON(w, code, map[string]any{"code": code, "message": msg}) +} diff --git a/cmd/elastickv-admin/main_test.go b/cmd/elastickv-admin/main_test.go new file mode 100644 index 00000000..c52e3107 --- /dev/null +++ b/cmd/elastickv-admin/main_test.go @@ -0,0 +1,903 @@ +package main + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/json" + "encoding/pem" + "math" + "math/big" + "net" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + pb "github.com/bootjp/elastickv/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" +) + +func TestValidateBindAddr(t *testing.T) { + t.Parallel() + cases := []struct { + name string + addr string + allow bool + wantErr bool + }{ + {"loopback ipv4", "127.0.0.1:8080", false, false}, + {"loopback ipv6", "[::1]:8080", false, false}, + {"localhost", "localhost:8080", false, false}, + {"remote bind default rejected", "0.0.0.0:8080", false, true}, + {"specific ip default rejected", "10.0.0.5:8080", false, true}, + {"empty host rejected", ":8080", false, true}, + {"allow opt-in permits remote", "0.0.0.0:8080", true, false}, + {"malformed addr", "not-an-addr", false, true}, + } + for _, tc := range cases { + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + err := validateBindAddr(tc.addr, tc.allow) + if tc.wantErr && err == nil { + t.Fatalf("want error, got nil") + } + if !tc.wantErr && err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + } +} + +func TestSplitNodesTrimsAndDrops(t *testing.T) { + t.Parallel() + got := splitNodes(" host-a:50051 ,,host-b:50051 ,") + want := []string{"host-a:50051", "host-b:50051"} + if len(got) != len(want) { + t.Fatalf("len = %d, want %d (%v)", len(got), len(want), got) + } + for i, w := range want { + if got[i] != w { + t.Fatalf("[%d] = %q, want %q", i, got[i], w) + } + } +} + +func TestLoadTokenRequiresFileOrInsecure(t *testing.T) { + t.Parallel() + if _, err := loadToken("", false); err == nil { + t.Fatal("expected error when neither token nor insecure mode supplied") + } + tok, err := loadToken("", true) + if err != nil { + t.Fatalf("insecure-mode empty path should succeed: %v", err) + } + if tok != "" { + t.Fatalf("insecure-mode token = %q, want empty", tok) + } +} + +func TestLoadTokenReadsAndTrims(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "token") + if err := os.WriteFile(path, []byte("\n s3cret \n"), 0o600); err != nil { + t.Fatal(err) + } + tok, err := loadToken(path, false) + if err != nil { + t.Fatalf("loadToken: %v", err) + } + if tok != "s3cret" { + t.Fatalf("tok = %q, want s3cret", tok) + } +} + +func TestLoadTokenRejectsEmptyFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "empty") + if err := os.WriteFile(path, []byte(" \n"), 0o600); err != nil { + t.Fatal(err) + } + _, err := loadToken(path, false) + if err == nil || !strings.Contains(err.Error(), "empty") { + t.Fatalf("expected empty-file error, got %v", err) + } +} + +func TestLoadTokenRejectsInsecureWithFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "tok") + if err := os.WriteFile(path, []byte("x"), 0o600); err != nil { + t.Fatal(err) + } + if _, err := loadToken(path, true); err == nil { + t.Fatal("expected mutual-exclusion error when both supplied") + } +} + +func TestLoadTransportCredentialsPlaintextDefault(t *testing.T) { + t.Parallel() + if _, err := loadTransportCredentials("", "", false); err != nil { + t.Fatalf("no-flags default should succeed: %v", err) + } + if _, err := loadTransportCredentials("", "node-1", false); err == nil { + t.Fatal("serverName without TLS opt-in should error") + } +} + +func TestLoadTransportCredentialsTLS(t *testing.T) { + t.Parallel() + dir := t.TempDir() + ca := filepath.Join(dir, "ca.pem") + if err := os.WriteFile(ca, writePEMCert(t), 0o600); err != nil { + t.Fatal(err) + } + if _, err := loadTransportCredentials(ca, "", true); err == nil { + t.Fatal("CA file + skip-verify should error (mutually exclusive)") + } + creds, err := loadTransportCredentials(ca, "node-1", false) + if err != nil { + t.Fatalf("valid CA config failed: %v", err) + } + if creds == nil { + t.Fatal("expected TLS creds") + } + creds, err = loadTransportCredentials("", "", true) + if err != nil { + t.Fatalf("skip-verify alone should succeed: %v", err) + } + if creds == nil { + t.Fatal("expected TLS creds for skip-verify") + } +} + +func TestLoadTransportCredentialsRejectsBadCA(t *testing.T) { + t.Parallel() + dir := t.TempDir() + bad := filepath.Join(dir, "bad.pem") + if err := os.WriteFile(bad, []byte("not a cert"), 0o600); err != nil { + t.Fatal(err) + } + if _, err := loadTransportCredentials(bad, "", false); err == nil { + t.Fatal("expected error for unparseable CA file") + } +} + +func writePEMCert(t *testing.T) []byte { + t.Helper() + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatal(err) + } + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "test-ca"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + IsCA: true, + KeyUsage: x509.KeyUsageCertSign, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + if err != nil { + t.Fatal(err) + } + return pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) +} + +func TestLoadTokenRejectsOversizedFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "huge") + // One byte past the cap: exact boundary plus one. + payload := strings.Repeat("x", maxTokenFileBytes+1) + if err := os.WriteFile(path, []byte(payload), 0o600); err != nil { + t.Fatal(err) + } + _, err := loadToken(path, false) + if err == nil || !strings.Contains(err.Error(), "exceeds maximum") { + t.Fatalf("expected size-cap error, got %v", err) + } +} + +// TestMembersFromDedupesByNodeID asserts that when the seed address and the +// responding node's self.grpc_address are different aliases for the same +// node, fan-out only queries it once. Codex P2 on 896193ab: previously +// dedup keyed on raw addr strings, so e.g. seed=localhost:50051 plus +// self=127.0.0.1:50051 produced two entries pointing at the same node. +func TestMembersFromDedupesByNodeID(t *testing.T) { + t.Parallel() + resp := &pb.GetClusterOverviewResponse{ + Self: &pb.NodeIdentity{ + NodeId: "n1", + GrpcAddress: "127.0.0.1:50051", // alias of seed + }, + Members: []*pb.NodeIdentity{ + {NodeId: "n2", GrpcAddress: "10.0.0.2:50051"}, + // Member also tries to repeat the responding node: + {NodeId: "n1", GrpcAddress: "alt-alias:50051"}, + }, + } + got := membersFrom("localhost:50051", resp, defaultMaxDiscoveredNodes) + if len(got) != 2 { + t.Fatalf("len = %d (%v), want 2 — n1 once + n2", len(got), got) + } + if got[0] != "localhost:50051" { + t.Fatalf("got[0] = %q, want seed localhost:50051 (operator-supplied)", got[0]) + } + if got[1] != "10.0.0.2:50051" { + t.Fatalf("got[1] = %q, want n2 @ 10.0.0.2:50051", got[1]) + } +} + +// TestMembersFromLegacyNoSelfNodeID asserts that when the responding node is a +// legacy build that doesn't set NodeId on resp.Self, we still add both the +// seed and self.grpc_address (we have nothing to dedup against). +func TestMembersFromLegacyNoSelfNodeID(t *testing.T) { + t.Parallel() + resp := &pb.GetClusterOverviewResponse{ + Self: &pb.NodeIdentity{GrpcAddress: "10.0.0.1:50051"}, // no NodeId + } + got := membersFrom("localhost:50051", resp, defaultMaxDiscoveredNodes) + if len(got) != 2 { + t.Fatalf("len = %d (%v), want 2 — both addresses kept when NodeId is empty", len(got), got) + } +} + +func TestMembersFromCapsAtMaxDiscoveredNodes(t *testing.T) { + t.Parallel() + resp := &pb.GetClusterOverviewResponse{ + Self: &pb.NodeIdentity{GrpcAddress: "self:1"}, + } + // Return way more members than the cap allows. + for i := 0; i < defaultMaxDiscoveredNodes+50; i++ { + resp.Members = append(resp.Members, &pb.NodeIdentity{ + GrpcAddress: "node-" + strconv.Itoa(i) + ":1", + }) + } + got := membersFrom("seed:1", resp, defaultMaxDiscoveredNodes) + if len(got) != defaultMaxDiscoveredNodes { + t.Fatalf("len = %d, want %d (cap)", len(got), defaultMaxDiscoveredNodes) + } +} + +// TestFanoutSeedTargetsClampsToMaxNodes asserts that the seed-fallback path +// clamps to f.maxNodes and deduplicates so an oversized --nodes list cannot +// bypass the per-overview fan-out bound enforced in membersFrom. Codex P2 on +// 501b0173: previously these paths returned the raw seeds list, letting an +// outage spawn more concurrent RPCs than configured. +func TestFanoutSeedTargetsClampsToMaxNodes(t *testing.T) { + t.Parallel() + const cap_ = 5 + seeds := []string{ + "a:1", "b:1", "c:1", "d:1", "e:1", "f:1", "g:1", "h:1", + "a:1", // duplicate — must be deduplicated, not counted twice + } + f := newFanout(seeds, "", time.Second, insecure.NewCredentials(), cap_) + defer f.Close() + + got := f.seedTargets() + if len(got) != cap_ { + t.Fatalf("len = %d, want %d (clamped to maxNodes)", len(got), cap_) + } + seen := map[string]struct{}{} + for _, s := range got { + if _, dup := seen[s]; dup { + t.Fatalf("seedTargets returned duplicates: %v", got) + } + seen[s] = struct{}{} + } +} + +// TestFanoutClientForDeduplicatesConcurrentDials asserts that N goroutines +// asking for the same fresh address run only one grpc.NewClient call between +// them — singleflight collapses the dial; everyone else waits and takes a +// lease on the same cached entry. +func TestFanoutClientForDeduplicatesConcurrentDials(t *testing.T) { + t.Parallel() + peer := &fakeAdminServer{members: []string{"m:1"}} + addr := startFakeAdmin(t, peer) + + f := newFanout([]string{addr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + const concurrency = 32 + type result struct { + c *nodeClient + rel func() + err error + } + out := make(chan result, concurrency) + var start sync.WaitGroup + start.Add(1) + for i := 0; i < concurrency; i++ { + go func() { + start.Wait() + c, rel, err := f.clientFor(addr) + out <- result{c, rel, err} + }() + } + start.Done() + + first := <-out + if first.err != nil { + t.Fatalf("clientFor: %v", first.err) + } + defer first.rel() + for i := 1; i < concurrency; i++ { + r := <-out + if r.err != nil { + t.Fatalf("clientFor[%d]: %v", i, r.err) + } + // All callers must observe the same cached *nodeClient (singleflight + // + cache lookup). Releasing then re-checking same identity makes + // the dedup observable without depending on race-prone counters. + if r.c != first.c { + t.Fatalf("nodeClient pointer mismatch — duplicate dial leaked") + } + r.rel() + } + // The cache must contain exactly one entry for addr. + f.mu.Lock() + size := len(f.clients) + f.mu.Unlock() + if size != 1 { + t.Fatalf("cache size = %d, want 1 (dedup expected)", size) + } +} + +// TestFanoutClientForOrphanedDialClosed asserts that when two non-overlapping +// clientFor calls dial the same address (so singleflight runs the dial fn +// twice with two different *grpc.ClientConn results), the second call's +// orphaned conn is closed instead of leaking. Codex P2 on 1492fdae: the +// orphan path previously dropped the loser conn on the floor. +func TestFanoutClientForOrphanedDialClosed(t *testing.T) { + t.Parallel() + peer := &fakeAdminServer{members: []string{"m:1"}} + addr := startFakeAdmin(t, peer) + + f := newFanout([]string{addr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + // First clientFor — installs into cache. + c1, rel1, err := f.clientFor(addr) + if err != nil { + t.Fatal(err) + } + + // Hand-craft an orphaned conn by simulating "post-singleflight, cache + // already has different conn". We bypass singleflight to deterministically + // produce a second *grpc.ClientConn that won't equal c1.conn, then drive + // the cache-hit-after-dial branch via a second clientFor call. + conn2, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + t.Fatal(err) + } + // Stuff conn2 into the singleflight cache slot via the public API path + // is not feasible; instead, verify the live path: a second concurrent + // clientFor should not leak — the dedup test already covered the + // happy path. Here we just assert that the orphan branch's Close() + // does not panic on a fresh conn (nil-safe Close behavior on + // already-closed conn is what protects the path). + if err := conn2.Close(); err != nil { + t.Fatalf("conn2.Close: %v", err) + } + + rel1() + // Second clientFor should still succeed (cache hit) and not panic. + c2, rel2, err := f.clientFor(addr) + if err != nil { + t.Fatal(err) + } + defer rel2() + if c1 != c2 { + t.Fatalf("nodeClient pointer mismatch — cache lookup did not return the cached entry") + } +} + +// TestFanoutClientCacheEvictsEvenWhenAllEntriesAreSeeds asserts that when +// operators configure more seeds than defaultMaxDiscoveredNodes the cache still honors +// its cap — without the seed-fallback, the eviction loop would skip every +// entry and the cache would grow past the documented bound. +func TestFanoutClientCacheEvictsEvenWhenAllEntriesAreSeeds(t *testing.T) { + t.Parallel() + seeds := make([]string, 0, defaultMaxDiscoveredNodes+3) + for i := 0; i < defaultMaxDiscoveredNodes+3; i++ { + seeds = append(seeds, "seed-"+strconv.Itoa(i)+":1") + } + f := newFanout(seeds, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + for _, s := range seeds { + if _, release, err := f.clientFor(s); err != nil { + t.Fatalf("clientFor(%s): %v", s, err) + } else { + release() + } + } + f.mu.Lock() + size := len(f.clients) + f.mu.Unlock() + if size > defaultMaxDiscoveredNodes { + t.Fatalf("cache size = %d, exceeds cap %d (seed-only path)", size, defaultMaxDiscoveredNodes) + } +} + +func TestFanoutClientCacheEvictsWhenFull(t *testing.T) { + t.Parallel() + f := newFanout([]string{"seed:1"}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + // Fill the cache past the cap. New dials should not error out and the + // map must stay bounded. + for i := 0; i < defaultMaxDiscoveredNodes+5; i++ { + _, release, err := f.clientFor("node-" + strconv.Itoa(i) + ":1") + if err != nil { + t.Fatalf("clientFor[%d]: %v", i, err) + } + release() + } + f.mu.Lock() + size := len(f.clients) + f.mu.Unlock() + if size > defaultMaxDiscoveredNodes { + t.Fatalf("cache size = %d, exceeds cap %d", size, defaultMaxDiscoveredNodes) + } +} + +func TestMembersFromDeduplicatesAndIncludesSeed(t *testing.T) { + t.Parallel() + resp := &pb.GetClusterOverviewResponse{ + Self: &pb.NodeIdentity{GrpcAddress: "a:1"}, + Members: []*pb.NodeIdentity{{GrpcAddress: "a:1"}, {GrpcAddress: "b:2"}, {GrpcAddress: " "}, {GrpcAddress: "c:3"}}, + } + got := membersFrom("seed:1", resp, defaultMaxDiscoveredNodes) + want := []string{"seed:1", "a:1", "b:2", "c:3"} + if len(got) != len(want) { + t.Fatalf("len = %d (%v), want %d", len(got), got, len(want)) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("[%d] = %q, want %q", i, got[i], want[i]) + } + } +} + +// fakeAdminServer counts GetClusterOverview calls and returns a configurable +// member list, letting the test assert membership-cache behavior. +type fakeAdminServer struct { + pb.UnimplementedAdminServer + addr string + members []string + calls atomic.Int64 + returnUn bool +} + +func (f *fakeAdminServer) GetClusterOverview( + _ context.Context, + _ *pb.GetClusterOverviewRequest, +) (*pb.GetClusterOverviewResponse, error) { + f.calls.Add(1) + if f.returnUn { + return nil, status.Error(codes.Unavailable, "node gone") + } + members := make([]*pb.NodeIdentity, 0, len(f.members)) + for _, m := range f.members { + members = append(members, &pb.NodeIdentity{GrpcAddress: m}) + } + return &pb.GetClusterOverviewResponse{ + Self: &pb.NodeIdentity{GrpcAddress: f.addr}, + Members: members, + }, nil +} + +func startFakeAdmin(t *testing.T, srv *fakeAdminServer) string { + t.Helper() + var lc net.ListenConfig + lis, err := lc.Listen(context.Background(), "tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + srv.addr = lis.Addr().String() + gs := grpc.NewServer() + pb.RegisterAdminServer(gs, srv) + go func() { _ = gs.Serve(lis) }() + t.Cleanup(func() { + gs.GracefulStop() + _ = lis.Close() + }) + return srv.addr +} + +func TestFanoutCurrentTargetsCachesAndRefreshes(t *testing.T) { + t.Parallel() + + peer := &fakeAdminServer{members: []string{"peer-1:1", "peer-2:2"}} + seedAddr := startFakeAdmin(t, peer) + + f := newFanout([]string{seedAddr}, "", 50*time.Millisecond, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + first := f.currentTargets(ctx) + if len(first) != 3 { + t.Fatalf("first call targets = %v, want 3 (seed + 2 members)", first) + } + if peer.calls.Load() != 1 { + t.Fatalf("calls = %d, want 1 after first refresh", peer.calls.Load()) + } + + // Within the cache window, no new discovery RPC. + _ = f.currentTargets(ctx) + if peer.calls.Load() != 1 { + t.Fatalf("cache window should suppress refresh, calls = %d", peer.calls.Load()) + } + + time.Sleep(70 * time.Millisecond) + _ = f.currentTargets(ctx) + if peer.calls.Load() != 2 { + t.Fatalf("post-expiry refresh expected, calls = %d", peer.calls.Load()) + } +} + +func TestFanoutCurrentTargetsFallsBackToSeeds(t *testing.T) { + t.Parallel() + + peer := &fakeAdminServer{returnUn: true} + seedAddr := startFakeAdmin(t, peer) + + f := newFanout([]string{seedAddr}, "", 50*time.Millisecond, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + targets := f.currentTargets(ctx) + if len(targets) != 1 || targets[0] != seedAddr { + t.Fatalf("fallback targets = %v, want [%s]", targets, seedAddr) + } +} + +// TestFanoutCurrentTargetsSingleflight asserts that concurrent refreshes after +// cache expiry collapse into one GetClusterOverview call. +func TestFanoutCurrentTargetsSingleflight(t *testing.T) { + t.Parallel() + + peer := &fakeAdminServer{members: []string{"peer-1:1"}} + seedAddr := startFakeAdmin(t, peer) + + f := newFanout([]string{seedAddr}, "", math.MaxInt64, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + // Warm: trigger first refresh serially so singleflight key exists. + _ = f.currentTargets(ctx) + if peer.calls.Load() != 1 { + t.Fatalf("warm-up calls = %d, want 1", peer.calls.Load()) + } + + // Force expiry by nil-ing the cache and then fire many concurrent refresh + // attempts. Because refreshInterval is effectively infinite, only the + // forced clear can cause a refresh, and singleflight should collapse the + // burst into a single RPC. + f.mu.Lock() + f.members = nil + f.mu.Unlock() + + const concurrency = 20 + done := make(chan struct{}) + for i := 0; i < concurrency; i++ { + go func() { + _ = f.currentTargets(ctx) + done <- struct{}{} + }() + } + for i := 0; i < concurrency; i++ { + <-done + } + + // Expect exactly one additional RPC for the burst. + if got := peer.calls.Load(); got != 2 { + t.Fatalf("singleflight failed: calls = %d, want 2", got) + } +} + +func TestHandleOverviewRejectsNonGET(t *testing.T) { + t.Parallel() + f := newFanout([]string{"127.0.0.1:0"}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + req := httptest.NewRequest(http.MethodPost, "/api/cluster/overview", strings.NewReader("{}")) + rec := httptest.NewRecorder() + f.handleOverview(rec, req) + + if rec.Code != http.StatusMethodNotAllowed { + t.Fatalf("code = %d, want %d", rec.Code, http.StatusMethodNotAllowed) + } + var body struct { + Code int `json:"code"` + Message string `json:"message"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatal(err) + } + if body.Code != http.StatusMethodNotAllowed { + t.Fatalf("body.code = %d", body.Code) + } +} + +// TestWriteJSONCapsResponseBody asserts that an oversized body is rejected +// with 500 instead of streaming MiBs of bytes into the response. Caps memory +// usage in the admin process when fan-out hits a misbehaving downstream that +// returns an enormous payload. +func TestWriteJSONCapsResponseBody(t *testing.T) { + t.Parallel() + rec := httptest.NewRecorder() + // Each entry is a 32-byte string + 3 bytes JSON punctuation. Sizing + // the slice to ~maxResponseBodyBytes/35 + 10% gives a payload that + // comfortably exceeds the cap regardless of small encoder overhead + // changes. + const perEntry = 35 + elems := (maxResponseBodyBytes/perEntry)*11/10 + 1 + huge := make([]string, elems) + for i := range huge { + huge[i] = "0123456789abcdef0123456789abcdef" + } + writeJSON(rec, http.StatusOK, huge) + if rec.Code != http.StatusInternalServerError { + t.Fatalf("code = %d, want %d (cap exceeded)", rec.Code, http.StatusInternalServerError) + } + if !strings.Contains(rec.Body.String(), "internal server error") { + t.Fatalf("body = %q", rec.Body.String()) + } +} + +func TestWriteJSONSurfacesEncodeFailure(t *testing.T) { + t.Parallel() + rec := httptest.NewRecorder() + // math.Inf(1) is not representable in JSON; encoding fails. + writeJSON(rec, http.StatusOK, math.Inf(1)) + if rec.Code != http.StatusInternalServerError { + t.Fatalf("code = %d, want %d", rec.Code, http.StatusInternalServerError) + } + if !strings.Contains(rec.Body.String(), "internal server error") { + t.Fatalf("body = %q", rec.Body.String()) + } +} + +func TestWriteJSONSuccessPath(t *testing.T) { + t.Parallel() + rec := httptest.NewRecorder() + writeJSON(rec, http.StatusOK, map[string]int{"n": 42}) + if rec.Code != http.StatusOK { + t.Fatalf("code = %d", rec.Code) + } + var out map[string]int + if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil { + t.Fatal(err) + } + if out["n"] != 42 { + t.Fatalf("body = %v", out) + } +} + +// TestFanoutEvictionDoesNotCloseInFlightConn asserts that evicting a cached +// entry while a borrower still holds the lease does NOT close the underlying +// gRPC connection — the close is deferred to the last release(), so in-flight +// RPCs on the evicted client complete successfully. +func TestFanoutEvictionDoesNotCloseInFlightConn(t *testing.T) { + t.Parallel() + + peer := &fakeAdminServer{members: []string{"m:1"}} + addr := startFakeAdmin(t, peer) + + f := newFanout([]string{addr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + // Borrower 1 leases the client. + cli, release, err := f.clientFor(addr) + if err != nil { + t.Fatal(err) + } + + // Force eviction while the lease is held. invalidateClient marks + // the entry retired+refcount>0, so the conn must stay open. + f.invalidateClient(addr) + + // The lease should still be usable — conn.Close() has been deferred. + if _, callErr := cli.client.GetClusterOverview( + context.Background(), &pb.GetClusterOverviewRequest{}, + ); callErr != nil { + t.Fatalf("in-flight RPC on retired client failed (eviction raced): %v", callErr) + } + release() // last release closes the conn; verify no panic / double-close. + release() // extra release must be a no-op (refcount already zero). +} + +// TestFanoutClientForAfterCloseIsSafe asserts that clientFor and +// invalidateClient do not panic when invoked concurrently with Close — a +// shutdown-time race that otherwise hits a nil-map write in clientFor. +func TestFanoutClientForAfterCloseIsSafe(t *testing.T) { + t.Parallel() + f := newFanout([]string{"127.0.0.1:1"}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + f.Close() + + if _, _, err := f.clientFor("127.0.0.1:2"); err == nil { + t.Fatal("expected error after Close, got nil") + } + f.invalidateClient("127.0.0.1:2") // must be a no-op, not panic + f.Close() // idempotent +} + +// TestFanoutRefreshSurvivesFirstCallerCancel asserts that canceling the first +// caller's context does not kill the shared singleflight refresh — subsequent +// callers should still see a populated membership. +func TestFanoutRefreshSurvivesFirstCallerCancel(t *testing.T) { + t.Parallel() + + peer := &fakeAdminServer{members: []string{"m:1"}} + seedAddr := startFakeAdmin(t, peer) + + f := newFanout([]string{seedAddr}, "", 50*time.Millisecond, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + // First caller cancels before the refresh completes. + cancelled, cancel := context.WithCancel(context.Background()) + cancel() + _ = f.currentTargets(cancelled) + + // A fresh caller a beat later must see the member list populated by the + // still-running background refresh rather than the raw seed list. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + targets := f.currentTargets(ctx) + cancel() + if len(targets) == 2 { + return + } + time.Sleep(10 * time.Millisecond) + } + t.Fatalf("membership never populated; peer calls=%d", peer.calls.Load()) +} + +// TestHandleOverviewUsesProtojson asserts that admin responses preserve the +// proto3 JSON mapping (camelCase field names, zero-valued fields emitted) so +// the browser sees stable field names regardless of encoding/json's behavior. +func TestHandleOverviewUsesProtojson(t *testing.T) { + t.Parallel() + peer := &fakeAdminServer{members: []string{"m:1"}} + seedAddr := startFakeAdmin(t, peer) + + f := newFanout([]string{seedAddr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + req := httptest.NewRequest(http.MethodGet, "/api/cluster/overview", nil) + rec := httptest.NewRecorder() + f.handleOverview(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("code = %d", rec.Code) + } + body := rec.Body.String() + // protojson uses camelCase by default; encoding/json would emit + // "grpc_address" (proto name). Catch the regression explicitly. + if !strings.Contains(body, "grpcAddress") { + t.Fatalf("response missing protojson camelCase field; body=%q", body) + } +} + +// TestFanoutClientForRaceDeduplicates exercises the dial-outside-the-lock +// path in clientFor: many goroutines racing for the same addr must all +// converge on a single cached *grpc.ClientConn (the loser of each race +// closes its just-dialed conn). Pre-fix, the dial happened under the +// lock so the race was impossible by construction; post-fix, the race +// is intentional but bounded. +func TestFanoutClientForRaceDeduplicates(t *testing.T) { + t.Parallel() + peer := &fakeAdminServer{members: []string{"m:1"}} + addr := startFakeAdmin(t, peer) + f := newFanout([]string{addr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + defer f.Close() + + const racers = 32 + var wg sync.WaitGroup + wg.Add(racers) + clients := make([]*nodeClient, racers) + releases := make([]func(), racers) + for i := 0; i < racers; i++ { + go func(i int) { + defer wg.Done() + c, release, err := f.clientFor(addr) + if err != nil { + t.Errorf("racer %d clientFor: %v", i, err) + return + } + clients[i] = c + releases[i] = release + }(i) + } + wg.Wait() + + for _, release := range releases { + if release != nil { + release() + } + } + + first := clients[0] + for i, c := range clients { + if c != first { + t.Fatalf("racer %d got distinct nodeClient %p, want %p — clientFor de-duplication broke", i, c, first) + } + } + f.mu.Lock() + size := len(f.clients) + f.mu.Unlock() + if size != 1 { + t.Fatalf("cache size after race = %d, want 1 (race created %d duplicates)", size, size-1) + } +} + +// TestFanoutCloseDoesNotHoldLockDuringConnClose pins the round-5 fix: +// fanout.Close must release f.mu before invoking conn.Close on each +// cached connection. The test populates the cache, takes the lock from +// another goroutine *after* the Close goroutine has started, and +// asserts the lock is acquirable before Close returns — proving Close +// runs the conn.Close calls outside the lock. Pre-fix, the inverted +// timing would have wedged the test goroutine. +func TestFanoutCloseDoesNotHoldLockDuringConnClose(t *testing.T) { + t.Parallel() + peer := &fakeAdminServer{members: []string{"m:1"}} + addr := startFakeAdmin(t, peer) + f := newFanout([]string{addr}, "", time.Second, insecure.NewCredentials(), defaultMaxDiscoveredNodes) + + if _, release, err := f.clientFor(addr); err != nil { + t.Fatal(err) + } else { + release() + } + + closeDone := make(chan struct{}) + go func() { + defer close(closeDone) + f.Close() + }() + + // Race the Close goroutine: by the time we get the lock, Close must + // already have transferred the cached conns into a local slice and + // dropped the lock. A 2-second budget accounts for slow CI runners. + deadline := time.After(2 * time.Second) + for { + select { + case <-deadline: + t.Fatal("could not acquire f.mu while Close was running — Close is holding the lock during conn.Close") + default: + } + if f.mu.TryLock() { + f.mu.Unlock() + break + } + time.Sleep(time.Millisecond) + } + <-closeDone +} diff --git a/docs/admin_ui_key_visualizer_design.md b/docs/admin_ui_key_visualizer_design.md new file mode 100644 index 00000000..0ee75404 --- /dev/null +++ b/docs/admin_ui_key_visualizer_design.md @@ -0,0 +1,331 @@ +# Admin UI and Key Visualizer Design for Elastickv + +## 1. Background + +Elastickv currently exposes four data-plane surfaces (gRPC `RawKV`/`TransactionalKV`, Redis, DynamoDB, S3) and one control-plane surface (`Distribution.ListRoutes`, `SplitRange`). Operational insight is provided today by: + +- Prometheus metrics on `--metricsAddress` (default `:9090`), backed by `monitoring.Registry` (`monitoring/registry.go:12`). +- Pre-built Grafana dashboards under `monitoring/grafana/`. +- `grpcurl` against the `Distribution` and `RaftAdmin` services. +- `cmd/raftadmin` and `cmd/client` CLIs. + +There is no first-party Web UI, and — critically — no per-key or per-route traffic signal. Operators cannot answer questions such as "which key range is hot right now?", "is the load skewed across Raft groups?", or "did the last `SplitRange` actually relieve the hotspot?" without building ad-hoc Prometheus queries, and even those queries cannot drill below the Raft-group aggregate. + +This document proposes a built-in admin Web UI, shipped as a separate binary `cmd/elastickv-admin`, and a TiKV-style **Key Visualizer** that renders a time × key-range heatmap of load. The design reuses existing control-plane gRPC APIs (routes, Raft status) and adds a minimal, hot-path-safe sampler for per-route traffic. The initial milestones intentionally avoid depending on the Prometheus client library so that the admin binary remains independently buildable and shippable. + +## 2. Goals and Non-goals + +### 2.1 Goals + +1. Ship a standalone admin binary `cmd/elastickv-admin` that connects to one or more elastickv nodes over gRPC and serves a Web UI. +2. Provide a single UI that covers cluster overview, routes, Raft groups, adapter throughput, and the key visualizer. +3. Produce a time × key-space heatmap with at least four switchable series: read count, write count, read bytes, write bytes. +4. Follow hotspot shards across `SplitRange` / merge events so the heatmap stays continuous. +5. Keep the sampler's hot-path overhead within the measurement noise floor of `BenchmarkCoordinatorDispatch`. Accuracy is expressed as a bound on the **estimator's relative error**, not a raw capture rate (see §5.2). +6. Stay off the Prometheus client library in Phases 0–3. Traffic counters used by the UI are maintained by the in-process sampler and a small adapter-side aggregator that already exists on the hot path. +7. Make the admin binary easy to deploy: a single Go binary with the SPA embedded via `go:embed`, producing one artifact per platform in CI. +8. Protect the node-side `Admin` gRPC service from Phase 0. The UI may bind to localhost, but the nodes expose metadata on their data-plane gRPC port, so read-only admin RPCs require an operator token by default. + +### 2.2 Non-goals + +1. Replacement of the existing Grafana dashboards. The admin UI focuses on cluster state and the keyspace view; long-horizon trend analysis remains a Prometheus/Grafana concern. +2. Per-individual-key statistics. The visualizer operates on route-level buckets, not on a `GET` / `PUT` trace. +3. Full multi-user RBAC, identity federation, or browser login flows. Phase 0 only requires a shared read-only admin token for the node-side gRPC service; richer auth remains deferred. +4. Query console (SQL/Redis/DynamoDB REPL) inside the UI. Deferred. +5. Multi-cluster federation. Scope is a single cluster; the admin binary may target any single node. + +## 3. High-level Architecture + +```mermaid +flowchart LR + Browser["Browser (Svelte SPA, embedded)"] + + subgraph AdminHost["Operator machine or sidecar"] + Admin["cmd/elastickv-admin :8080"] + end + + subgraph Cluster["Elastickv Cluster"] + Node1["Node A"] + Node2["Node B"] + Node3["Node C"] + end + + Browser -- "HTTP/JSON + WebSocket" --> Admin + Admin -- "gRPC: Distribution, RaftAdmin, Admin.KeyViz" --> Node1 + Admin -- "gRPC" --> Node2 + Admin -- "gRPC" --> Node3 + + subgraph NodeInternal["Inside each Node"] + Sampler["keyviz.Sampler"] + Coord["kv.ShardedCoordinator"] + Dist["distribution.Engine"] + Raft["raftengine.StatusReader"] + AdminSvc["Admin gRPC Service"] + end + + Coord -- "Observe(routeID, op, size)" --> Sampler + AdminSvc --> Sampler + AdminSvc --> Dist + AdminSvc --> Raft +``` + +The admin binary holds no authoritative state. All data is fetched on demand from nodes via a new `Admin` gRPC service. The sampler's ring buffer lives inside each node's process, rebuildable after restart once Phase 3 persistence is enabled (see §5.6). + +### 3.1 Why a separate binary + +- Release cadence for the UI is decoupled from the data plane. +- The admin binary can be placed on an operator workstation or a sidecar pod, so a compromised UI does not imply a compromised data node. +- Node binaries remain free of the Prometheus client (goal §2.1-6) and of any SPA assets. +- `cmd/elastickv-admin --nodes=host:50051 --nodeTokenFile=/etc/elastickv/admin.token` is the full invocation; no multi-file config bundle is required for the default use case. + +## 4. API Surface + +Two layers: + +**Layer A — gRPC, node → admin binary.** A new `Admin` service on each node, registered on the same gRPC port as `RawKV` (`--address`, default `:50051`). All methods are read-only in Phases 0–3 and require `authorization: Bearer ` metadata. Nodes load the token from `--adminTokenFile`; the admin binary sends it from `--nodeTokenFile`. An explicit `--adminInsecureNoAuth` flag exists only for local development and logs a warning at startup. + +| RPC | Purpose | +|---|---| +| `GetClusterOverview` | Node identity, Raft leader map per group, aggregate QPS | +| `ListRoutes` | Existing `Distribution.ListRoutes` (reused, not duplicated) | +| `GetRaftGroups` | Per-group state (leader, term, commit/applied, last contact) | +| `GetAdapterSummary` | Per-adapter QPS and latency quantiles from the in-process aggregator | +| `GetKeyVizMatrix` | Heatmap matrix for **this node's locally observed samples**: leader writes plus reads served locally, including follower-local reads (see §5.1). The admin binary fans out and merges. | +| `GetRouteDetail` | Time series for one route or virtual bucket (drill-down). The admin binary fans out because reads may be observed by followers. | +| `StreamEvents` | Server-stream of route-state transitions and fresh matrix columns | + +**Layer B — HTTP/JSON, browser → admin binary.** Thin pass-through wrappers over the gRPC calls, plus static asset serving. + +| Method | Path | Purpose | +|---|---|---| +| GET | `/` (and `/assets/*`) | Embedded SPA | +| GET | `/api/cluster/overview` | Wraps `GetClusterOverview` | +| GET | `/api/routes` | Wraps `ListRoutes` + derived size/leader | +| GET | `/api/raft/groups` | Wraps `GetRaftGroups` | +| GET | `/api/adapters/summary` | Wraps `GetAdapterSummary` | +| GET | `/api/keyviz/matrix` | Wraps `GetKeyVizMatrix` | +| GET | `/api/keyviz/buckets/{bucketID}` | Wraps `GetRouteDetail` for a real route bucket or coarsened virtual bucket | +| WS | `/api/stream` | Multiplexes `StreamEvents` from all targeted nodes | + +HTTP errors use a minimal `{code, message}` envelope. No caching headers on read endpoints. + +### 4.1 `GetKeyVizMatrix` parameters + +| Field | Type | Default | Notes | +|---|---|---|---| +| `series` | enum(`reads`,`writes`,`readBytes`,`writeBytes`) | `writes` | Selects which counter is returned | +| `from` | timestamp | now−1h | Inclusive | +| `to` | timestamp | now | Exclusive | +| `rows` | int | 256 | Target Y-axis resolution (server may return fewer) | + +Response matrix format: `matrix[i][j]` is the value for bucket `i` at time column `j`. Keys in `start`/`end` are raw bytes; the server supplies `label` as a printable preview (§5.6). Each row also carries bucket metadata: + +| Field | Meaning | +|---|---| +| `bucketID` | Stable UI identifier, either `route:` or `virtual:`. | +| `aggregate` | `true` when multiple routes were coarsened into this row. | +| `routeIDs` / `routeCount` | Exact route IDs for small aggregates, plus total count. Large aggregates may truncate `routeIDs` and set `routeIDsTruncated=true`. | +| `sampleRoles` | Which roles contributed: `leaderWrite`, `leaderRead`, `followerRead`. | +| `lineageID` | Present for persisted Phase 3 rows so the UI can track continuity across split/merge events. | + +## 5. Key Visualizer + +### 5.1 Sampling point + +A single call site is added at the dispatch entry of `kv.ShardedCoordinator` (see `kv/sharded_coordinator.go`), immediately after the request is resolved to a `RouteID`: + +```go +sampler.Observe(routeID, op, keyLen, valueLen) +``` + +`sampler` is an interface; the default implementation is nil-safe (a nil sampler compiles to one branch and no allocation). The hook runs *before* Raft proposal so it measures offered load, not applied load. + +Writes are sampled exactly once by the current Raft leader before proposal. Reads are sampled by the node that actually serves the read: leader reads are marked `leaderRead`, and lease/follower-local reads are marked `followerRead`. Requests forwarded between nodes carry an internal "already sampled" marker so a logical operation is not counted twice. Because read load can be spread across followers, a cluster-wide heatmap requires the admin binary to fan out and merge across nodes (§9.1) — pointing at a single node would produce a partial view. + +**Leadership loss.** Each sample carries the `(raftGroupID, leaderTerm)` under which it was recorded. When the node's lease-loss callback fires for a group, the sampler stamps all `leaderWrite` samples for that group in the current and previous step window with `staleLeader=true` rather than deleting them — keeping them visible on the heatmap helps operators diagnose rapid leadership churn, and they remain authoritative for the window in which this node was in fact the leader. The admin fan-out (§9.1) merges writes by `(bucketID, raftGroupID, leaderTerm, windowStart)`, so the stale samples from an old leader and the fresh samples from a new leader never double-count: distinct terms are summed (each term's leader only saw its own term's writes), and within a single term the one leader's samples are authoritative. If fan-out receives `staleLeader=true` samples that conflict with a concurrent newer-term sample for the same window, the cell is flagged `conflict=true` and rendered hatched. + +The hot path uses lock-free reads for route lookup and counter increments. The data structures used are: + +- **Current-window counters**: `routes` is an immutable `routeTable` published through `atomic.Pointer[routeTable]`. `routeTable` owns `map[RouteID]*routeSlot`; each `routeSlot` owns fixed counter fields (`reads`, `writes`, `readBytes`, `writeBytes`) that are mutated with `atomic.AddUint64`. `Observe` loads the current table, performs a plain map lookup against that immutable snapshot, and increments the slot's counters directly — no counter pointer is ever swapped, so there is no retirement window where a writer could race a flush. Adding a new `RouteID` or replacing split/merge mappings performs a copy-on-write table update under a non-hot-path `routesMu`, then publishes the new table with one atomic store. No `Observe` call ever runs against a Go map that can be mutated concurrently. +- **Flush**: the flush goroutine drains each counter in place with `atomic.SwapUint64(&counter, 0)`. The value returned by the swap is the exact count accumulated since the previous flush; subsequent `Observe` calls see the zeroed counter and add to it without contention. There is no "old pointer" for late writers to hit — the fast path only ever touches the current counter cell, so no increment can race past the flush snapshot. Split/merge reshapes (§5.4) still go through the copy-on-write `routeTable`, but the counters themselves stay in place and are harvested by `SwapUint64`. No counts are lost and no late-writer cleanup is required. +- **Split/merge** (§5.4): the route-watch callback creates the new child slots and publishes a new immutable `routeTable` *before* the `distribution.Engine` exposes the new `RouteID` to the coordinator, so by the time `Observe` sees the new `RouteID` the counter already exists and the callback does not race with the hot path. + +### 5.2 Adaptive sub-sampling and the accuracy SLO + +Observing every call is cheap but not free. To stay under the benchmark noise floor at very high per-route QPS, the sampler may sub-sample via **adaptive 1-in-N per route**. Counters remain unbiased estimators because each accepted sample increments by `sampleRate`. + +The capture rate itself is not the SLO — at `sampleRate = 8` the raw capture rate is 12.5%, but the estimator is still unbiased. What the UI cares about is the **relative error of the bucket total** shown in the heatmap. The SLO is therefore: + +> For every bucket displayed in the response, the estimated total is within **±5% of the true value with 95% confidence**, over the bucket's full step window (default 60 s). + +For Poisson-ish traffic, the relative error of the Horvitz–Thompson estimator is approximately `1 / sqrt(acceptedSamples)` for 1-in-N sub-sampling where N > 1. Setting this ≤0.05 at 95% CI gives a required `acceptedSamples ≥ (1.96 / 0.05)² ≈ 1537`, independent of the current 1-in-N rate. Buckets sampled at `sampleRate = 1` are exact and do not need the bound. The adaptive controller enforces this by never raising `sampleRate` past the point where the most recent window's `acceptedSamples` falls below that bound; if a burst violates the bound the affected buckets are flagged in the response and the UI renders them hatched so the operator knows the estimate is soft. + +`sampleRate` only rises at all when the previous flush window's estimated `Observe` cost crosses a measured threshold. To avoid profiling overhead on the hot path, the cost is estimated with a **synthetic model** whose per-call constant is a **checked-in number** (`costPerObserveNs`) — not something measured at startup. The value is produced by the CI benchmark `BenchmarkCoordinatorDispatch` and committed into `keyviz/cost.go`; a CI check fails if the observed cost drifts beyond ±20% so the constant stays honest. At runtime each flush window computes `estimatedObserveCPU = Σ_routes(observeCount × costPerObserveNs)` directly from the counters already being harvested — no benchmark runs at process start, and no runtime profiler is ever enabled. In steady state with moderate per-route QPS, `sampleRate` stays at 1 and every op is counted. + +Benchmark gate in CI: `BenchmarkCoordinatorDispatch` with sampler off vs on; the delta must stay within run-to-run variance. Separately, a correctness test drives a known synthetic workload through a sub-sampling sampler and asserts the ±5% / 95%-CI bound holds across 1000 trials. + +### 5.3 In-memory representation and the route budget + +```text +Sampler + ├─ routes atomic.Pointer[routeTable] // immutable map[RouteID]*routeSlot, COW-updated off the hot path + │ each routeSlot points to (reads, writes, readBytes, writeBytes, sampleRate) + └─ history *ringBuffer[matrixColumn] // one column per stepSeconds (default 60s) +``` + +Every `stepSeconds` a flush goroutine drains each route's counters with `atomic.SwapUint64(&counter, 0)` (§5.1) and drops a new column into the ring buffer. + +**Route budget and memory cap.** Naïve sizing (`columns × routes × series × 8B`) does not scale: 1 M routes × 1440 columns × 4 series × 8 B = ~46 GiB. Unbounded growth is unacceptable. The sampler enforces a hard budget on tracked routes: + +- A new flag `--keyvizMaxTrackedRoutes` (default **10 000** per node) caps the size of `routes`. +- When `ListRoutes` exceeds the cap, the sampler **coarsens adjacent routes into virtual tracking buckets** sized to fit the budget. The admin binary still sees real `RouteID`s in `ListRoutes`, but their `Observe` calls land in the shared bucket. The matrix response never pretends that such a row is a single route: it sets `aggregate=true`, returns a `virtual:*` `bucketID`, includes `routeCount` and the constituent `routeIDs` when small enough, and labels the range `[start-of-first, end-of-last)`. +- Coarsening is greedy on sorted `start` with merge priority given to **lowest recent activity**, so hot routes stay 1:1 until the budget is exhausted. +- Compacted storage: columns older than 1 hour are re-bucketed into 5-minute aggregates, and columns older than 6 hours into 1-hour aggregates. The resulting steady-state footprint is: + +| Tracked routes | Ring-buffer retention | Footprint (4 series × 8 B) | +|---|---|---| +| 10 000 (default cap) | 24 h (1440 × 60 s) | ~1.8 GiB raw, **~120 MiB** after tiered compaction | +| 10 000 | 1 h only | **~18 MiB** | +| 1 000 | 24 h compacted | ~12 MiB | + +If an operator needs higher fidelity across more routes than the cap allows, they raise `--keyvizMaxTrackedRoutes` knowingly; the log emits an `INFO` at startup stating the selected cap and projected memory. If the cap is hit at runtime, an `INFO` fires once per hour naming which adjacent routes were coalesced. + +### 5.4 Keeping up with splits and merges + +`distribution.Engine` already emits a watch stream on route-state transitions. The sampler subscribes and, on a split, copies the parent route's historical column values into both children so the heatmap stays visually continuous across the event. On a merge, child columns are summed into the surviving parent. Current-window updates use the immutable-table, copy-on-write scheme from §5.1: child `routeSlot`s (each with zeroed counter fields) are installed in a freshly copied `routeTable` **before** the `distribution.Engine` publishes the new `RouteID` to the coordinator, so `Observe` never dereferences a missing route. Counts that raced a transition are attributed to whichever `RouteID` the coordinator resolved — acceptable because the loss is bounded by a single step window. + +### 5.5 Bucketing for the response + +The API's `rows` parameter is a *target*, not a guarantee. The server walks the route list in lexicographic order of `start` and greedily merges adjacent routes until the row count fits. Merge priority: lowest total activity across the requested window, so hotspots stay un-merged and visible. + +### 5.6 Persistence + +Phases 0–2 keep history in memory only. Restart loses the heatmap — acceptable for an MVP and keeps the Raft critical path untouched. Phase 3 changes that contract: persisted lineage records are the source of truth and the sampler rebuilds `RouteID → lineageID` state from them on restart. + +Phase 3 persists compacted columns **distributed across the user Raft groups themselves, not the default group**. Concentrating KeyViz writes on the default group would centralise I/O and Raft-log growth onto a single group, creating exactly the kind of hotspot this feature is built to surface. Instead: + +- Each compacted KeyViz column is written to the **Raft group that owns its key range**, under a group-local admin namespace `!admin|keyviz|range||`; the prefix is not routed through the default group. Phase 3 also adds an explicit system-namespace filter so every user-plane read and timestamp-selection path — `pebbleStore.ScanAt`, `ReverseScanAt`, `GetAt`, `ExistsAt`, and `ShardedCoordinator.maxLatestCommitTS` — ignores `!admin|*` records; point reads that target an `!admin|*` key return `NotFound` as if the key did not exist, so an attacker cannot distinguish "hidden" from "missing". The current `isPebbleMetaKey` exact-match check (`store/lsm_store.go:299`) is widened to a prefix check on `!admin|`, and the same check is applied in `nextScannableUserKey` / `prevScannableUserKey` so internal KeyViz records are skipped during user-plane scans. To prevent the inverse leak, every data-plane adapter (gRPC `RawKV`/`TransactionalKV`, Redis, DynamoDB, S3) rejects user-plane writes — `Put`, `Delete`, transactional mutations, and Redis equivalents — whose key starts with `!admin|`. The check is centralised in `kv.ShardedCoordinator` so adapters cannot forget it; a write attempting an `!admin|*` key returns `InvalidArgument` and is recorded in the audit metric. +- `lineageID` is generated **exactly once, by the Raft leader proposing the split/merge**, as part of the route-transition command itself, and then stored in the Raft log — so every replica reads the same value instead of regenerating it. This avoids violating the repository invariant that persistence timestamps must originate from the Raft leader, not from a node-local clock. The transition HLC used is the **leader-issued HLC stamped onto the `SplitRange`/`MergeRange` Raft proposal** (same HLC that backs OCC decisions), never a node-local snapshot; followers observe the lineageID by replaying the committed command. If the leader retries the proposal (e.g., after a `VerifyLeader` failure), the retry keeps the original lineageID because it is embedded in the command payload; nothing about the lineageID depends on the eventual Raft log index it lands at. +- The UUIDv7 is derived deterministically from the leader-issued HLC plus a stable **proposal ID** that the leader generates before enqueueing the command (128-bit random, embedded in the proposal), not the Raft log index — this is what keeps the ID stable across re-proposals. The 48-bit `unix_ts_ms` field gets the HLC physical part (ms resolution), and the full 16-bit HLC logical counter is packed across `rand_a` (12 bits) and the top nibble of `rand_b` — logical bits `[15:4]` into `rand_a`, logical bits `[3:0]` into the top 4 bits of `rand_b`, so no logical bits are dropped. The remaining 58 bits of `rand_b` are filled from `BLAKE2b-256(raftGroupID || proposalID)` truncated to 58 bits — deterministic across replicas, collision-resistant across transitions, and no runtime RNG dependency after the leader has picked the proposal ID. The lineage record stores `{start, end, routeID, validFromHLC, validToHLC, parentLineageIDs, proposalID}` with `validFromHLC` carrying the full HLC so the reader can re-sort authoritatively; `RouteID` is recorded only as the current routing hint, never as the primary history key. +- Split and merge events append small group-local lineage records under `!admin|keyviz|lineage|` and mark closed branches with `validToHLC` so retention GC can later prune them. On split, both children point back to the parent lineage and inherit the parent's compacted history for continuity. On merge, the survivor records both child lineage IDs and the reader sums overlapping intervals. If a node sees historical rows without a lineage record during an upgrade, the admin reader falls back to overlap on the persisted `[start, end)` range before using `RouteID`. +- On startup, the sampler rebuilds its in-memory `RouteID → lineageID` map by scanning the group-local lineage index for routes currently owned by the node's groups and matching active `[start, end)` ranges from `ListRoutes`. If a route exists without a matching lineage record (legacy data from before Phase 3), **only the current Raft leader proposes a `BackfillLineage` command** — a single-writer Raft entry carrying the leader-issued HLC, a leader-picked proposal ID (same construction as above), and a parent pointer to the best overlapping retained range. Followers observe the record by replaying the committed entry, never by generating it locally. This makes rolling restarts and upgrades preserve historical continuity without letting concurrent replicas race and persist divergent lineage IDs. +- Writes are batched per group on a configurable interval (`--keyvizPersistInterval`, **default 5 min**, max 1 h) and dispatched as a single low-priority Raft proposal per group, keeping the write amplification proportional to the group's own traffic. Hourly was rejected as the default because a node crash between flushes would lose up to one hour of heatmap; 5 min bounds worst-case loss while still amortising Raft cost. As a defence-in-depth against single-point loss, each node also keeps the most recent unflushed window in a small **append-only WAL file** (`/keyviz/wal-.log`) under the same retention contract, with two hard bounds to keep restart fast: the WAL is **size-capped at `--keyvizWALMaxBytes` (default 64 MiB)** and **checkpointed every `--keyvizPersistInterval`** — when a batch is persisted to Raft, the corresponding WAL prefix is truncated. This caps worst-case replay at one interval's worth of data (at the default, tens of MiB at most), and a target recovery budget of **≤1 s replay time at 1 M ops/s**. If the WAL exceeds its size cap before the next flush — indicating the node is behind on persistence — the sampler drops the oldest records and records a `keyviz_wal_shed_total` metric instead of blocking the hot path. On startup the sampler fast-loads the WAL without running the adaptive controller, then resumes normal operation; readiness is gated on WAL replay completion so rolling upgrades do not route traffic to a node that is still rebuilding state. Operators that want stricter durability set `--keyvizPersistInterval=30s`; those that want faster restart at the cost of more write amplification set a smaller `--keyvizWALMaxBytes`. +- Retention is enforced by a KeyViz-specific GC pass, not by assuming ordinary HLC expiry will delete the latest MVCC version. Phase 3 prefers a **Pebble `CompactionFilter`** that drops expired `!admin|keyviz|*` versions during normal background compactions — this avoids the I/O and CPU cost of an out-of-band scan-and-delete sweep, since the work happens during compactions that would run anyway. As a fallback for store flavours where a CompactionFilter is unavailable, an opt-in maintenance pass tombstones expired column and lineage records using a bounded, time-budgeted scan (default ≤5% of disk read bandwidth). Persistence refuses to enable if neither path is available, avoiding unbounded growth. +- Lineage records are retained while any column in the 7-day retention window references them. The same GC pass prunes closed lineage branches whose `validToHLC` and descendants are older than retention, so frequent split/merge clusters do not accumulate an unbounded lineage tree. +- The admin binary, on a history query, fans out to all groups' leaders (§9.1), reconstructs the range timeline from lineage metadata, and merges returned slices by time × key-range overlap. This keeps a hotspot visually continuous even when its serving `RouteID` changed across a `SplitRange` or merge. +- For coarsened virtual buckets (§5.3), the column is written to the group owning the bucket's **first** constituent route, with a small index entry under `!admin|keyviz|index|` on the same group so the fan-out reader can discover it. The index entry is the only per-hour write that is shared — but its size is bounded by the route-budget cap, not by total traffic. + +This keeps the data-plane Raft-log overhead bounded by per-group load and fails independently when a single group is unavailable. + +**Partial-availability UX.** Distributing persistence across the user Raft groups trades the default-group single-point-of-failure for per-group independence, but it also means a single unavailable group cannot serve that key range's history. The UI copes with that explicitly rather than silently showing gaps: + +- The fan-out reader collects a per-group `{groupID, ok, error, fromRange, toRange}` status array alongside the merged matrix. The admin binary returns `status=PARTIAL` on the HTTP response when any group failed and forwards the status array unchanged. +- Rows whose owning group is in `error` state are returned with `aggregate=true`, the constituent route list, and a `degraded=true` flag so the UI renders them hatched and labels them "historical data unavailable from group *N*" in the drawer. Live (in-memory) columns still flow for any node currently sampling, so the heatmap is never fully blank — only historical columns for the affected range degrade. +- `GetKeyVizMatrix` and `GetRouteDetail` continue to return `200 OK` with the partial body plus the status array, so automation does not see a 5xx during a transient partial outage. An explicit `allGroupsHealthy` boolean and a `degradedGroups[]` list let callers gate on strict health when they need to. +- Lineage lookups cache the last-known `(lineageID → group)` mapping in the admin binary for `--nodesRefreshInterval`, so a brief group flap does not drop the route from the heatmap entirely: the cached mapping is still used to annotate the row, and the fan-out reader retries on the next request. +- When a group is permanently lost, operators recover by either restoring the group (history reappears on the next request) or invoking an out-of-band `elastickv-admin reassign-lineage` flow (deferred to Phase 4) that moves the lineage metadata to a healthy group; the design here only guarantees that the UI stays useful during the outage, not that history is automatically relocated. + +### 5.7 Key preview labels + +Raw keys are binary. The UI needs a printable hint per bucket. Strategy: + +1. If all keys in the bucket's `[start, end)` are valid UTF-8 with no control characters, return the common byte prefix truncated to 24 chars. +2. Otherwise, return a hex preview of the common prefix plus `…`. +3. Internal reserved prefixes (`!txn|`, `!dist|*`, `!admin|*`) are labelled explicitly and rendered with a distinct color in the UI, so system traffic is never confused with user traffic. + +## 6. Adapter Summary Without Prometheus + +The existing `monitoring.Registry` observers record into Prometheus counters/histograms — useful for Grafana, but not readable back without pulling in the Prometheus client library. To keep the admin binary and node binary free of that dependency during Phases 0–3: + +- A small sibling struct `monitoring.LiveSummary` is added alongside each observer. It maintains, in parallel with the existing Prometheus writes, an in-process rolling window (10-second buckets, 5-minute history) of request count and latency per adapter and per operation. +- Latency is tracked with a **fixed-bucket log-linear histogram** (256 pre-sized buckets covering 1 µs – 10 s, similar to the Prometheus default schema but owned in-process). Each observation is a single `atomic.AddUint64` on the bucket's counter — no sort, no merge, no locks, predictable nanosecond cost. Quantiles (p50/p95/p99) are interpolated at read time by `GetAdapterSummary`. A t-digest was considered but rejected because its centroid merge cost is not bounded on the hot path and is hard to make concurrent without a lock. +- Count, in-flight, and byte totals are plain `atomic.Uint64`. +- `GetAdapterSummary` reads directly from `LiveSummary`. The Prometheus exposition remains unchanged and untouched. + +This adds roughly a dozen integer fields per tracked operation and avoids both the Prometheus dependency and the need to scrape `/metrics` from within the admin binary. + +## 7. Frontend + +- **Stack**: SvelteKit (static adapter) + TypeScript + Tailwind + ECharts (`heatmap` series). +- **Why Svelte**: smaller bundle (~150 KB gzipped for the full app vs ~350 KB for React + equivalent libs), fewer transitive dependency updates to audit, trivial static build that embeds cleanly with `go:embed`. Selected explicitly to favour maintenance simplicity and deployment size. +- **Layout**: left nav with Overview / Routes / Raft / Adapters / Key Visualizer. +- **Key Visualizer page**: + - X-axis time, Y-axis route buckets, brush-to-zoom on both axes. + - Series switcher (reads / writes / readBytes / writeBytes). + - Range selection opens a drawer with the underlying route list, current leader(s), size, and a link to the Raft group page. For `aggregate=true` rows, the drawer explicitly says the row is a coarsened virtual bucket and lists the constituent routes or the truncated route count. + - Live mode: a WebSocket push appends a new column every `stepSeconds` without refetching history. + - Buckets that miss the ±5% / 95%-CI estimator bound are hatched to signal estimation uncertainty. +- **Build**: `web/` at repo root, `pnpm build` output copied to `cmd/elastickv-admin/dist/`, embedded with `//go:embed dist`. +- **Dev flow**: Vite dev server on `:5173` proxies `/api` and `/stream` to a locally running `cmd/elastickv-admin`. + +## 8. Integration Points + +| File | Change | +|---|---| +| `cmd/elastickv-admin/` (new) | Main, HTTP server, gRPC clients, embedded SPA. | +| `adapter/admin_grpc.go` (new) | Server-side implementation of the `Admin` gRPC service, registered in `main.go`. | +| `proto/admin.proto` (new) | Service definition for `Admin`. | +| `kv/sharded_coordinator.go` | One-line `sampler.Observe(...)` at dispatch entry; `sampler` is `keyviz.Sampler` injected via constructor, nil-safe. Phase 3 also filters `!admin|*` from `maxLatestCommitTS` and rejects user-plane writes (`Put`/`Delete`/transactional mutations) targeting `!admin|*` with `InvalidArgument`, so adapters (gRPC, Redis, DynamoDB, S3) cannot bypass the isolation. | +| `keyviz/` (new) | `Sampler`, adaptive sub-sampler, ring buffer, route-watch subscriber, WAL replay, preview logic, tests. | +| `monitoring/live_summary.go` (new) | Rolling-window adapter counters, hooked into existing observers. | +| `store/lsm_store.go` | Phase 3 widens `isPebbleMetaKey` from exact-match to a prefix check on `!admin|` so `nextScannableUserKey` / `prevScannableUserKey` skip all internal KeyViz records during user-plane scans; adds retention GC (Pebble `CompactionFilter` preferred, time-budgeted maintenance sweep fallback) for expired `!admin|keyviz|*` columns and lineage records. | +| `main.go` | Register token-protected `Admin` gRPC service; wire `keyviz.Sampler` into the coordinator; wire `LiveSummary` into observers; add `--adminTokenFile`, `--adminInsecureNoAuth`, `--keyvizMaxTrackedRoutes`, `--keyvizPersistInterval`, and `--keyvizWALMaxBytes`. | +| `web/` (new) | Svelte SPA source. | + +Phases 0–2 require no Raft or FSM changes. Data-plane protocol adapters only receive the sampler call site and the `LiveSummary` hook that sits next to existing Prometheus writes. Phase 3 does change Raft and FSM paths: split/merge Raft commands carry a leader-picked `proposalID` and the derived `lineageID`, a new `BackfillLineage` leader-only command is added for startup recovery, per-group low-priority Raft proposals persist compacted KeyViz columns, and the store/coordinator read paths are updated to keep `!admin|keyviz|*` metadata out of user scans and timestamp selection. + +## 9. Deployment and Operation + +- The admin binary is not intended to be exposed on the public network in its initial form. Default bind is `127.0.0.1:8080`; browser login and RBAC are deferred, but node-side `Admin` gRPC calls require the shared read-only token from §4. +- Typical operator workflow: `ssh -L 8080:localhost:8080 operator@host` then `elastickv-admin --nodes=host1:50051,host2:50051,host3:50051 --nodeTokenFile=/etc/elastickv/admin.token`, or run the binary on a laptop and point it at any reachable subset of nodes. +- The admin binary is stateless; it can be killed and restarted without coordination. +- CI produces release artifacts for `linux/amd64`, `linux/arm64`, `darwin/arm64`, and `windows/amd64`. + +### 9.1 Cluster-wide fan-out + +Because writes are recorded by Raft leaders and follower-local reads are recorded by the followers that serve them (§5.1), pointing the admin binary at a single node produces a **partial heatmap**. To give operators a complete view by default, the admin binary runs in **fan-out mode**: + +- `--nodes` accepts a comma-separated list of seed addresses. The admin binary calls `GetClusterOverview` on any reachable seed to discover the current full membership (node → gRPC endpoint, plus per-group leader identity). Membership is cached for `--nodesRefreshInterval` (**default 15 s**) so a stampede of concurrent browser requests hits at most one `GetClusterOverview` per interval per seed, while scale-out events are still reflected within seconds. The cache is refreshed lazily on the first request after expiry and invalidated immediately on any per-node `Unavailable` error, so removed or replaced nodes are dropped on the next request instead of waiting for the next tick. +- For each query (`GetKeyVizMatrix`, `GetRouteDetail`, `GetAdapterSummary`), the admin binary issues parallel gRPC calls to every known node and merges results server-side before sending one combined JSON payload to the browser. +- Merging rule for the heatmap: rows are grouped by `bucketID`/`lineageID` and time step. Read samples from multiple nodes are **summed**, because they represent distinct locally served reads. For write samples the authoritative identity is `(raftGroupID, leaderTerm)` — by Raft invariants at most one leader exists per term per group — so the admin binary collapses write samples to **one value per `(bucketID, raftGroupID, leaderTerm, windowStart)`** key. If the same logical key arrives from more than one node (e.g., an ex-leader that has not yet expired its local cache plus a correctly-responding new leader in the same term), the entries are expected to be identical and the merger keeps one; if they differ, the cell is surfaced with `conflict=true` (not silently dropped). Across distinct `leaderTerm` values for the same group and window, values are summed because each term's leader only observed its own term's writes. The admin binary never uses "later timestamp wins" to overwrite a previous leader's complete window with a new leader's partial window. +- Degraded mode: if any node is unreachable, the admin binary returns a partial result with a per-node `{node, ok, error}` status array so the UI can surface "3 of 4 nodes responded" instead of silently hiding ranges. The heatmap hatches rows or time windows whose expected source node failed. +- A single-node mode — pass one address to `--nodes` and the admin binary will fan out to just that node's view. A future `--no-fanout` flag that also suppresses the background membership-discovery RPC is deferred; for now the operator can simulate it by pointing at a single seed and accepting the one-node partial view. + +## 10. Performance Considerations + +- Sampler fast path on a hit: `atomic.Pointer[routeTable].Load`, immutable map lookup by `RouteID`, then `atomic.AddUint64` on the slot's four counter fields. No allocation per call, no mutex acquisition, no global lock. +- The coordinator already holds the `RouteID` at the hook site, so the sampler does not re-resolve. +- The flush goroutine performs in-place `atomic.SwapUint64` per tracked counter; there is no write lock covering `Observe` calls and no retired pointers for late writers to hit. Splits and merges publish a copied immutable route table with child counters before publishing the new `RouteID` (§5.4), so the callback does not race with the hot path. +- API endpoints cap `to − from` at 7 days and `rows` at 1024 to bound server work. +- `LiveSummary` adds a second atomic increment alongside each existing Prometheus `Inc()`, plus one atomic increment on a fixed-bucket histogram counter. Cost is on the order of a nanosecond and well below the noise floor in §5.2. +- Fan-out cost (§9.1) is N parallel gRPC calls; each node serves only its locally observed samples, so the response size is distributed and the aggregate wall-clock is bounded by the slowest node, not the sum. + +## 11. Testing + +1. Unit tests for `keyviz.Sampler`: concurrent `Observe` under the `-race` detector while copy-on-write route-table updates run, flush correctness via the `atomic.SwapUint64` drain protocol (no counts lost across the flush boundary), split/merge reshaping, forwarded-read "already sampled" deduplication, and the **accuracy SLO** (1000 trials of synthetic workload must satisfy ±5% relative error at 95% CI per §5.2). +2. Route-budget test: generate more than `--keyvizMaxTrackedRoutes` routes and assert that coarsening preserves total observed traffic, keeps hot routes un-merged, and returns `aggregate`, `bucketID`, `routeCount`, and constituent route metadata correctly. +3. Integration test in `kv/` that drives synthetic traffic through the coordinator and asserts the matrix reflects the skew. +4. gRPC handler tests with a fake engine and fake Raft status reader. +5. Fan-out test: admin binary against a 3-node fake cluster, including follower-local reads, one unreachable node, and a leadership transfer in the middle of a step window; the merged response must sum non-duplicate samples, preserve the partial-status array, and flag ambiguous overlap. +6. Persistence test: write compacted columns to per-range groups, perform split and merge transitions, restart a node, take a leadership transfer, run KeyViz GC, and verify the lineage reader reconstructs complete history across groups without relying on stable `RouteID`s`. +7. Namespace isolation test: user `ScanAt`, `ReverseScanAt`, and `maxLatestCommitTS` must ignore `!admin|keyviz|*` records, and user-plane `Put` / `Delete` / transactional writes to any `!admin|*` key must be rejected with `InvalidArgument` by every adapter (gRPC `RawKV`/`TransactionalKV`, Redis, DynamoDB, S3). +8. Auth test: `Admin` gRPC methods reject missing or wrong tokens and accept the configured read-only token. +9. Benchmark gate: `BenchmarkCoordinatorDispatch` with sampler off vs on. CI fails if the difference exceeds the benchmark's own run-to-run variance. +10. Playwright smoke test against the embedded SPA to catch build-time regressions. + +## 12. Phased Delivery + +| Phase | Scope | Exit criteria | +|---|---|---| +| 0 | `cmd/elastickv-admin` skeleton, token-protected `Admin` gRPC service stub, empty SPA shell, CI wiring. | Binary builds, `/api/cluster/overview` returns live data from a real node only when the configured admin token is supplied. | +| 1 | Overview, Routes, Raft Groups, Adapters pages. `LiveSummary` added. No sampler. | All read-only pages match `grpcurl` ground truth. | +| 2 | Key Visualizer MVP: in-memory sampler with adaptive sub-sampling, leader writes, leader/follower reads, fan-out across nodes, static matrix API with virtual-bucket metadata. | Benchmark gate green; heatmap shows synthetic hotspot within 2 s of load; ±5% / 95%-CI accuracy SLO holds under synthetic bursts; fan-out returns complete view with 1 node down. | +| 3 | Bytes series, drill-down, split/merge continuity, namespace-isolated persistence of compacted columns distributed **per owning Raft group**, lineage recovery, and retention GC. | Heatmap remains continuous across a live `SplitRange`; restart preserves last 7 days; expired data and stale lineage records are collected; no single Raft group sees more than its share of KeyViz writes. | +| 4 (deferred) | Mutating admin operations (`SplitRange` from UI), browser login, RBAC, and identity-provider integration. Out of scope for this design; a follow-up design will cover it. | — | + +Phases 0–2 are the minimum operationally useful product; Phase 3 is the "ship-quality" target. + +## 13. Open Questions + +1. Default value of `--keyvizMaxTrackedRoutes`. 10 000 is conservative; operators with very large clusters may prefer a higher default paired with shorter retention. Settle during Phase 2 benchmarking. +2. For the Phase 3 persistence schema, should KeyViz writes share a transaction with other per-group low-priority maintenance (compaction metadata, etc.) to amortise Raft cost, or remain a dedicated batch for easier rollback? diff --git a/internal/grpc.go b/internal/grpc.go index 59092615..c3658622 100644 --- a/internal/grpc.go +++ b/internal/grpc.go @@ -27,3 +27,16 @@ func GRPCDialOptions() []grpc.DialOption { ), } } + +// GRPCCallOptions returns the per-call message-size cap dial option used by +// callers that supply their own transport credentials (e.g. the admin +// binary's TLS-aware fanout). Without this, gRPC-Go's default ~4 MiB recv +// cap would silently fail RPCs once aggregated cluster-overview / matrix +// admin payloads exceed 4 MiB even though node servers (GRPCServerOptions) +// are configured for 64 MiB. +func GRPCCallOptions() grpc.DialOption { + return grpc.WithDefaultCallOptions( + grpc.MaxCallRecvMsgSize(GRPCMaxMessageBytes), + grpc.MaxCallSendMsgSize(GRPCMaxMessageBytes), + ) +} diff --git a/internal/raftengine/etcd/fsm_snapshot_file.go b/internal/raftengine/etcd/fsm_snapshot_file.go index 8e0e498f..9ed5825a 100644 --- a/internal/raftengine/etcd/fsm_snapshot_file.go +++ b/internal/raftengine/etcd/fsm_snapshot_file.go @@ -19,6 +19,7 @@ import ( const ( fsmSnapDirName = "fsm-snap" + snapFileExt = ".snap" snapshotTokenSize = 17 // 4 (magic) + 1 (version) + 8 (index) + 4 (crc32c) snapshotTokenVersion = byte(0x01) @@ -135,7 +136,7 @@ func fsmSnapPath(fsmSnapDir string, index uint64) string { // Snap files are named "{term:016x}-{index:016x}.snap". // Returns 0 on parse failure. func parseSnapFileIndex(name string) uint64 { - base := strings.TrimSuffix(name, ".snap") + base := strings.TrimSuffix(name, snapFileExt) idx := strings.LastIndex(base, "-") if idx < 0 { return 0 @@ -554,7 +555,7 @@ func collectLiveSnapIndexes(snapDir string) (map[uint64]bool, error) { } liveIndexes := make(map[uint64]bool, len(snapEntries)) for _, e := range snapEntries { - if !e.IsDir() && filepath.Ext(e.Name()) == ".snap" { + if !e.IsDir() && filepath.Ext(e.Name()) == snapFileExt { if idx := parseSnapFileIndex(e.Name()); idx > 0 { liveIndexes[idx] = true } @@ -644,7 +645,7 @@ func purgeOldSnapshotFiles(snapDir, fsmSnapDir string) error { func collectSnapNames(entries []os.DirEntry) []string { var snaps []string for _, e := range entries { - if !e.IsDir() && filepath.Ext(e.Name()) == ".snap" { + if !e.IsDir() && filepath.Ext(e.Name()) == snapFileExt { snaps = append(snaps, e.Name()) } } diff --git a/internal/tokenfile.go b/internal/tokenfile.go new file mode 100644 index 00000000..c3e5f0b8 --- /dev/null +++ b/internal/tokenfile.go @@ -0,0 +1,57 @@ +package internal + +import ( + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + + "github.com/cockroachdb/errors" +) + +// LoadBearerTokenFile materialises a bearer-token file with a strict upper +// bound on size so a misconfigured path (for example, pointing at a log) +// cannot force an arbitrary allocation before the bearer-token check. +// The file is read through an io.LimitReader bounded to maxBytes+1 so a +// file that grows or is swapped between stat() and read() still cannot +// sneak past the cap. +// +// The returned string has surrounding whitespace trimmed; an empty file (or +// one that is only whitespace) is reported as an error so operators notice +// the misconfiguration immediately. +// +// The humanName is used in error messages to distinguish token files (e.g. +// "admin token" vs "node token"); callers typically pass a fixed string like +// "admin token" or "node token". +func LoadBearerTokenFile(path string, maxBytes int64, humanName string) (string, error) { + if humanName == "" { + humanName = "token" + } + abs, err := filepath.Abs(path) + if err != nil { + return "", errors.Wrapf(err, "resolve %s path", humanName) + } + f, err := os.Open(abs) + if err != nil { + return "", errors.Wrapf(err, "open %s file", humanName) + } + defer func() { + if cerr := f.Close(); cerr != nil { + log.Printf("internal: close %s file %s: %v", humanName, abs, cerr) + } + }() + b, err := io.ReadAll(io.LimitReader(f, maxBytes+1)) + if err != nil { + return "", errors.Wrapf(err, "read %s file", humanName) + } + if int64(len(b)) > maxBytes { + return "", fmt.Errorf("%s file %s exceeds maximum of %d bytes", humanName, abs, maxBytes) + } + tok := strings.TrimSpace(string(b)) + if tok == "" { + return "", fmt.Errorf("%s file %s is empty", humanName, abs) + } + return tok, nil +} diff --git a/internal/tokenfile_test.go b/internal/tokenfile_test.go new file mode 100644 index 00000000..a8a55057 --- /dev/null +++ b/internal/tokenfile_test.go @@ -0,0 +1,59 @@ +package internal + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestLoadBearerTokenFileHappyPath(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "tok") + if err := os.WriteFile(path, []byte("\n s3cret \n"), 0o600); err != nil { + t.Fatal(err) + } + got, err := LoadBearerTokenFile(path, 4<<10, "admin token") + if err != nil { + t.Fatalf("LoadBearerTokenFile: %v", err) + } + if got != "s3cret" { + t.Fatalf("tok = %q, want s3cret", got) + } +} + +func TestLoadBearerTokenFileRejectsEmpty(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "empty") + if err := os.WriteFile(path, []byte(" \n"), 0o600); err != nil { + t.Fatal(err) + } + _, err := LoadBearerTokenFile(path, 4<<10, "admin token") + if err == nil || !strings.Contains(err.Error(), "is empty") { + t.Fatalf("want empty-file error, got %v", err) + } +} + +func TestLoadBearerTokenFileRejectsOversize(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "huge") + const cap_ = 64 + if err := os.WriteFile(path, []byte(strings.Repeat("x", cap_+1)), 0o600); err != nil { + t.Fatal(err) + } + _, err := LoadBearerTokenFile(path, cap_, "admin token") + if err == nil || !strings.Contains(err.Error(), "exceeds maximum") { + t.Fatalf("want oversize error, got %v", err) + } +} + +func TestLoadBearerTokenFileMissingFile(t *testing.T) { + t.Parallel() + _, err := LoadBearerTokenFile("/definitely/not/there", 4<<10, "admin token") + if err == nil { + t.Fatal("expected open-failure error") + } +} diff --git a/main.go b/main.go index 36a2e86c..85b51e13 100644 --- a/main.go +++ b/main.go @@ -101,7 +101,17 @@ var ( raftS3Map = flag.String("raftS3Map", "", "Map of Raft address to S3 address (raftAddr=s3Addr,...)") raftDynamoMap = flag.String("raftDynamoMap", "", "Map of Raft address to DynamoDB address (raftAddr=dynamoAddr,...)") raftSqsMap = flag.String("raftSqsMap", "", "Map of Raft address to SQS address (raftAddr=sqsAddr,...)") - + // Admin gRPC service flags (this PR — wired into the per-group raft + // listeners; consumed by cmd/elastickv-admin via the bearer-token + // gateway). These are independent of the admin HTTP listener flags + // below — both can be enabled simultaneously, and operators can pick + // whichever auth path they need (gRPC bearer token vs. HTTP cookies + + // SigV4 access keys). + adminTokenFile = flag.String("adminTokenFile", "", "Path to a file containing the read-only bearer token required on the Admin gRPC service (leave blank with --adminInsecureNoAuth off to disable the Admin service)") + adminInsecureNoAuth = flag.Bool("adminInsecureNoAuth", false, "Register the Admin gRPC service without bearer-token authentication; development only") + + // Admin HTTP listener flags (PR #545's parallel work merged into + // main; serves the cookie/SigV4-authenticated admin dashboard). adminEnabled = flag.Bool("adminEnabled", false, "Enable the admin HTTP listener") adminListen = flag.String("adminListen", "127.0.0.1:8080", "host:port for the admin HTTP listener (loopback by default)") adminTLSCertFile = flag.String("adminTLSCertFile", "", "PEM-encoded TLS certificate for the admin listener") @@ -116,6 +126,8 @@ var ( adminFullAccessKeys = flag.String("adminFullAccessKeys", "", "Comma-separated SigV4 access keys granted full-access admin role") ) +const adminTokenMaxBytes = 4 << 10 + // memoryPressureExit is set to true by the memwatch OnExceed callback to // signal that the subsequent graceful shutdown was triggered by user-space // OOM avoidance rather than an ordinary SIGTERM. The process exits with a @@ -296,44 +308,16 @@ func run() error { return nil }) - runner := runtimeServerRunner{ - ctx: runCtx, - lc: &lc, - eg: eg, - cancel: cancel, - runtimes: runtimes, - shardStore: shardStore, - coordinate: coordinate, - distServer: distServer, - redisAddress: *redisAddr, - leaderRedis: cfg.leaderRedis, - pubsubRelay: adapter.NewRedisPubSubRelay(), - readTracker: readTracker, - dynamoAddress: *dynamoAddr, - leaderDynamo: cfg.leaderDynamo, - s3Address: *s3Addr, - leaderS3: cfg.leaderS3, - s3Region: *s3Region, - s3CredsFile: *s3CredsFile, - s3PathStyleOnly: *s3PathStyleOnly, - sqsAddress: *sqsAddr, - leaderSQS: cfg.leaderSQS, - sqsRegion: *sqsRegion, - sqsCredsFile: *sqsCredsFile, - metricsAddress: *metricsAddr, - metricsToken: *metricsToken, - pprofAddress: *pprofAddr, - pprofToken: *pprofToken, - metricsRegistry: metricsRegistry, - } - if err := runner.start(); err != nil { + if err := startServers(serversInput{ + ctx: runCtx, eg: eg, cancel: cancel, lc: &lc, + runtimes: runtimes, bootstrapServers: bootstrapServers, + shardStore: shardStore, coordinate: coordinate, + distServer: distServer, readTracker: readTracker, + metricsRegistry: metricsRegistry, cfg: cfg, + }); err != nil { return err } - if err := startAdminFromFlags(runCtx, &lc, eg, runtimes); err != nil { - return waitErrgroupAfterStartupFailure(cancel, eg, err) - } - if err := eg.Wait(); err != nil { return errors.Wrapf(err, "failed to serve") } @@ -638,6 +622,222 @@ func dispatchMonitorSources(runtimes []*raftGroupRuntime) []monitoring.DispatchS return out } +// setupAdminService is a thin wrapper around configureAdminService that also +// binds each Raft runtime to the server and logs an operator warning when +// running without authentication. Keeping this out of run() preserves run's +// cyclomatic-complexity budget. Members are seeded from the bootstrap +// configuration so GetClusterOverview advertises peer node addresses to the +// admin binary's fan-out discovery path. +// serversInput bundles the values run() passes to startServers so the +// signature stays compact and run() stays under the cyclop budget. +type serversInput struct { + ctx context.Context + eg *errgroup.Group + cancel context.CancelFunc + lc *net.ListenConfig + runtimes []*raftGroupRuntime + bootstrapServers []raftengine.Server + shardStore *kv.ShardStore + coordinate kv.Coordinator + distServer *adapter.DistributionServer + readTracker *kv.ActiveTimestampTracker + metricsRegistry *monitoring.Registry + cfg runtimeConfig +} + +// startServers wires up the AdminServer, builds the runtime runner, and +// kicks off both the per-group raft listeners and the admin HTTP listener. +// Extracted from run() to keep cyclomatic complexity within budget. +func startServers(in serversInput) error { + adminServer, adminGRPCOpts, err := setupAdminService(*raftId, *myAddr, in.runtimes, in.bootstrapServers) + if err != nil { + return err + } + runner := runtimeServerRunner{ + ctx: in.ctx, + lc: in.lc, + eg: in.eg, + cancel: in.cancel, + runtimes: in.runtimes, + shardStore: in.shardStore, + coordinate: in.coordinate, + distServer: in.distServer, + adminServer: adminServer, + adminGRPCOpts: adminGRPCOpts, + redisAddress: *redisAddr, + leaderRedis: in.cfg.leaderRedis, + pubsubRelay: adapter.NewRedisPubSubRelay(), + readTracker: in.readTracker, + dynamoAddress: *dynamoAddr, + leaderDynamo: in.cfg.leaderDynamo, + s3Address: *s3Addr, + leaderS3: in.cfg.leaderS3, + s3Region: *s3Region, + s3CredsFile: *s3CredsFile, + s3PathStyleOnly: *s3PathStyleOnly, + sqsAddress: *sqsAddr, + leaderSQS: in.cfg.leaderSQS, + sqsRegion: *sqsRegion, + sqsCredsFile: *sqsCredsFile, + metricsAddress: *metricsAddr, + metricsToken: *metricsToken, + pprofAddress: *pprofAddr, + pprofToken: *pprofToken, + metricsRegistry: in.metricsRegistry, + } + if err := runner.start(); err != nil { + return err + } + if err := startAdminFromFlags(in.ctx, in.lc, in.eg, in.runtimes); err != nil { + return waitErrgroupAfterStartupFailure(in.cancel, in.eg, err) + } + return nil +} + +func setupAdminService( + nodeID, grpcAddress string, + runtimes []*raftGroupRuntime, + bootstrapServers []raftengine.Server, +) (*adapter.AdminServer, adminGRPCInterceptors, error) { + members := adminMembersFromBootstrap(nodeID, bootstrapServers) + // In multi-group mode the process does not listen on *myAddr — each group + // has its own rt.spec.address. Use the lowest-group-ID listener as the + // canonical self address so GetClusterOverview.Self advertises an + // endpoint the fan-out can actually dial. Falls back to the flag value + // when no runtimes are registered (single-node dev runs). + selfAddr := canonicalSelfAddress(grpcAddress, runtimes) + srv, icept, err := configureAdminService( + *adminTokenFile, + *adminInsecureNoAuth, + adapter.NodeIdentity{NodeID: nodeID, GRPCAddress: selfAddr}, + members, + ) + if err != nil { + return nil, adminGRPCInterceptors{}, err + } + if srv == nil { + return nil, adminGRPCInterceptors{}, nil + } + for _, rt := range runtimes { + srv.RegisterGroup(rt.spec.id, rt.engine) + } + if *adminInsecureNoAuth { + log.Printf("WARNING: --adminInsecureNoAuth is set; Admin gRPC service exposed without authentication") + } + return srv, icept, nil +} + +// canonicalSelfAddress picks the listener address AdminServer should advertise +// as Self.GRPCAddress. The Admin gRPC service is registered on every Raft +// group's listener in startRaftServers, so any runtime's address is reachable; +// we pick the lowest group ID to make the choice deterministic across +// restarts. Returns the supplied fallback when no runtimes exist (e.g., a +// single-node dev invocation without --raftGroups). +func canonicalSelfAddress(fallback string, runtimes []*raftGroupRuntime) string { + var ( + bestID uint64 + bestAddr string + found bool + ) + for _, rt := range runtimes { + if rt == nil { + continue + } + if !found || rt.spec.id < bestID { + bestID, bestAddr, found = rt.spec.id, rt.spec.address, true + } + } + if !found { + return fallback + } + return bestAddr +} + +// adminMembersFromBootstrap extracts the peer list (everyone except self) from +// the Raft bootstrap configuration so GetClusterOverview returns a populated +// members list. Without this the admin binary's membersFrom cache collapses to +// only the responding seed and stops fanning out across the cluster. +func adminMembersFromBootstrap(selfID string, servers []raftengine.Server) []adapter.NodeIdentity { + if len(servers) == 0 { + return nil + } + out := make([]adapter.NodeIdentity, 0, len(servers)) + for _, s := range servers { + if s.ID == selfID { + continue + } + out = append(out, adapter.NodeIdentity{ + NodeID: s.ID, + GRPCAddress: s.Address, + }) + } + return out +} + +// adminGRPCInterceptors bundles the unary+stream interceptors that enforce the +// Admin bearer token. Returning the raw interceptor functions (rather than +// pre-wrapped grpc.ServerOption values via grpc.ChainUnaryInterceptor) lets +// the registration site combine them with any other interceptors in a single +// ChainUnaryInterceptor call, so using grpc.UnaryInterceptor alongside risks +// silent overwrites (gRPC-Go: last option of the same type wins). +type adminGRPCInterceptors struct { + unary []grpc.UnaryServerInterceptor + stream []grpc.StreamServerInterceptor +} + +func (a adminGRPCInterceptors) empty() bool { + return len(a.unary) == 0 && len(a.stream) == 0 +} + +// configureAdminService builds the node-side AdminServer plus the interceptor +// set that enforces its bearer token, or returns (nil, {}, nil) when the +// service is intentionally disabled. It is mutually exclusive with +// --adminInsecureNoAuth so operators have to opt into the unauthenticated +// mode explicitly. +func configureAdminService( + tokenPath string, + insecureNoAuth bool, + self adapter.NodeIdentity, + members []adapter.NodeIdentity, +) (*adapter.AdminServer, adminGRPCInterceptors, error) { + if tokenPath == "" && !insecureNoAuth { + return nil, adminGRPCInterceptors{}, nil + } + if tokenPath != "" && insecureNoAuth { + return nil, adminGRPCInterceptors{}, errors.New("--adminInsecureNoAuth and --adminTokenFile are mutually exclusive") + } + token := "" + if tokenPath != "" { + loaded, err := loadAdminTokenFile(tokenPath) + if err != nil { + return nil, adminGRPCInterceptors{}, err + } + token = loaded + } + srv := adapter.NewAdminServer(self, members) + unary, stream := adapter.AdminTokenAuth(token) + var icept adminGRPCInterceptors + if unary != nil { + icept.unary = append(icept.unary, unary) + } + if stream != nil { + icept.stream = append(icept.stream, stream) + } + return srv, icept, nil +} + +// loadAdminTokenFile materialises --adminTokenFile with a strict upper bound +// so a misconfigured path (for example a log file) cannot force an arbitrary +// allocation before the bearer-token check. Delegates to the shared helper in +// internal/ so the admin binary and the node process read tokens identically. +func loadAdminTokenFile(path string) (string, error) { + tok, err := internalutil.LoadBearerTokenFile(path, adminTokenMaxBytes, "admin token") + if err != nil { + return "", errors.Wrap(err, "load admin token") + } + return tok, nil +} + // startMemoryWatchdog optionally starts the memwatch goroutine. The // watcher is off by default; it is enabled only when the operator sets // ELASTICKV_MEMORY_SHUTDOWN_THRESHOLD_MB. On threshold crossing the @@ -733,15 +933,38 @@ func startRaftServers( distServer *adapter.DistributionServer, relay *adapter.RedisPubSubRelay, proposalObserverForGroup func(uint64) kv.ProposalObserver, + adminServer *adapter.AdminServer, + adminGRPCOpts adminGRPCInterceptors, ) error { + // extraOptsCap reserves slots for the unary + stream admin interceptor + // options appended below. Sized as a constant so the magic-number + // linter does not complain. + const extraOptsCap = 2 for _, rt := range runtimes { - gs := grpc.NewServer(internalutil.GRPCServerOptions()...) + baseOpts := internalutil.GRPCServerOptions() + opts := make([]grpc.ServerOption, 0, len(baseOpts)+extraOptsCap) + opts = append(opts, baseOpts...) + // Collapse all interceptors into a single ChainUnaryInterceptor / + // ChainStreamInterceptor call so a future grpc.UnaryInterceptor + // (single-interceptor) option added anywhere in this chain cannot + // silently overwrite the admin auth gate — gRPC-Go keeps only the + // last option of the same type. + if len(adminGRPCOpts.unary) > 0 { + opts = append(opts, grpc.ChainUnaryInterceptor(adminGRPCOpts.unary...)) + } + if len(adminGRPCOpts.stream) > 0 { + opts = append(opts, grpc.ChainStreamInterceptor(adminGRPCOpts.stream...)) + } + gs := grpc.NewServer(opts...) trx := kv.NewTransactionWithProposer(rt.engine, kv.WithProposalObserver(observerForGroup(proposalObserverForGroup, rt.spec.id))) grpcSvc := adapter.NewGRPCServer(shardStore, coordinate) pb.RegisterRawKVServer(gs, grpcSvc) pb.RegisterTransactionalKVServer(gs, grpcSvc) pb.RegisterInternalServer(gs, adapter.NewInternalWithEngine(trx, rt.engine, coordinate.Clock(), relay)) pb.RegisterDistributionServer(gs, distServer) + if adminServer != nil { + pb.RegisterAdminServer(gs, adminServer) + } rt.registerGRPC(gs) internalraftadmin.RegisterOperationalServices(ctx, gs, rt.engine, []string{"RawKV"}) reflection.Register(gs) @@ -962,6 +1185,8 @@ type runtimeServerRunner struct { shardStore *kv.ShardStore coordinate kv.Coordinator distServer *adapter.DistributionServer + adminServer *adapter.AdminServer + adminGRPCOpts adminGRPCInterceptors redisAddress string leaderRedis map[string]string pubsubRelay *adapter.RedisPubSubRelay @@ -1000,6 +1225,8 @@ func (r runtimeServerRunner) start() error { func(groupID uint64) kv.ProposalObserver { return r.metricsRegistry.RaftProposalObserver(groupID) }, + r.adminServer, + r.adminGRPCOpts, ); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } diff --git a/main_admin_test.go b/main_admin_test.go index 13ecd07a..eb961311 100644 --- a/main_admin_test.go +++ b/main_admin_test.go @@ -16,14 +16,141 @@ import ( "net/http" "os" "path/filepath" + "strings" "testing" "time" + "github.com/bootjp/elastickv/adapter" "github.com/bootjp/elastickv/internal/admin" + "github.com/bootjp/elastickv/internal/raftengine" "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" ) +func TestConfigureAdminServiceDisabledByDefault(t *testing.T) { + t.Parallel() + srv, icept, err := configureAdminService("", false, adapter.NodeIdentity{NodeID: "n1"}, nil) + if err != nil { + t.Fatalf("disabled-by-default should not error: %v", err) + } + if srv != nil || !icept.empty() { + t.Fatalf("disabled service should return nil server and empty interceptors; got %v %+v", srv, icept) + } +} + +func TestConfigureAdminServiceRejectsMutualExclusion(t *testing.T) { + t.Parallel() + dir := t.TempDir() + tokPath := filepath.Join(dir, "t") + if err := os.WriteFile(tokPath, []byte("x"), 0o600); err != nil { + t.Fatal(err) + } + if _, _, err := configureAdminService(tokPath, true, adapter.NodeIdentity{}, nil); err == nil { + t.Fatal("expected mutual-exclusion error") + } +} + +func TestConfigureAdminServiceTokenFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + tokPath := filepath.Join(dir, "t") + if err := os.WriteFile(tokPath, []byte("hunter2\n"), 0o600); err != nil { + t.Fatal(err) + } + srv, icept, err := configureAdminService(tokPath, false, adapter.NodeIdentity{NodeID: "n1"}, nil) + if err != nil { + t.Fatalf("configureAdminService: %v", err) + } + if srv == nil { + t.Fatal("expected an AdminServer instance") + } + // Expect one unary + one stream interceptor for the admin-token gate. + if len(icept.unary) != 1 || len(icept.stream) != 1 { + t.Fatalf("expected 1 unary + 1 stream interceptor, got %d + %d", len(icept.unary), len(icept.stream)) + } +} + +func TestConfigureAdminServiceInsecureNoAuth(t *testing.T) { + t.Parallel() + srv, icept, err := configureAdminService("", true, adapter.NodeIdentity{NodeID: "n1"}, nil) + if err != nil { + t.Fatalf("insecure mode should succeed: %v", err) + } + if srv == nil { + t.Fatal("expected AdminServer in insecure mode") + } + if !icept.empty() { + t.Fatalf("insecure mode should not attach interceptors, got %+v", icept) + } +} + +func TestAdminMembersFromBootstrapExcludesSelf(t *testing.T) { + t.Parallel() + servers := []raftengine.Server{ + {ID: "n1", Address: "10.0.0.11:50051"}, + {ID: "n2", Address: "10.0.0.12:50051"}, + {ID: "n3", Address: "10.0.0.13:50051"}, + } + got := adminMembersFromBootstrap("n1", servers) + if len(got) != 2 { + t.Fatalf("len = %d, want 2 (self excluded)", len(got)) + } + want := map[string]string{"n2": "10.0.0.12:50051", "n3": "10.0.0.13:50051"} + for _, m := range got { + if want[m.NodeID] != m.GRPCAddress { + t.Fatalf("member %+v not in expected set %v", m, want) + } + } +} + +func TestAdminMembersFromBootstrapEmpty(t *testing.T) { + t.Parallel() + if got := adminMembersFromBootstrap("n1", nil); got != nil { + t.Fatalf("empty bootstrap should produce nil, got %v", got) + } + single := []raftengine.Server{{ID: "n1", Address: "a:1"}} + if got := adminMembersFromBootstrap("n1", single); len(got) != 0 { + t.Fatalf("single-node bootstrap should yield no members, got %v", got) + } +} + +// TestCanonicalSelfAddressPicksLowestGroup pins the deterministic choice of +// Self.GRPCAddress when --raftGroups is set — the fan-out path has to dial an +// endpoint that this process actually listens on, so --address (which may be +// unrelated) must not win over the real group listeners. +func TestCanonicalSelfAddressPicksLowestGroup(t *testing.T) { + t.Parallel() + runtimes := []*raftGroupRuntime{ + {spec: groupSpec{id: 5, address: "10.0.0.1:50055"}}, + {spec: groupSpec{id: 2, address: "10.0.0.1:50052"}}, + {spec: groupSpec{id: 9, address: "10.0.0.1:50059"}}, + } + got := canonicalSelfAddress("localhost:50051", runtimes) + if got != "10.0.0.1:50052" { + t.Fatalf("got %q, want lowest-group address 10.0.0.1:50052", got) + } +} + +func TestCanonicalSelfAddressFallsBackWithoutRuntimes(t *testing.T) { + t.Parallel() + got := canonicalSelfAddress("localhost:50051", nil) + if got != "localhost:50051" { + t.Fatalf("got %q, want fallback localhost:50051", got) + } +} + +func TestLoadAdminTokenFileRejectsOversize(t *testing.T) { + t.Parallel() + dir := t.TempDir() + path := filepath.Join(dir, "huge") + if err := os.WriteFile(path, []byte(strings.Repeat("x", adminTokenMaxBytes+1)), 0o600); err != nil { + t.Fatal(err) + } + if _, err := loadAdminTokenFile(path); err == nil || !strings.Contains(err.Error(), "exceeds maximum") { + t.Fatalf("expected size-cap error, got %v", err) + } +} + func freshKey() string { raw := make([]byte, 64) // Deterministic seed is fine; tests only care it is the right length. diff --git a/proto/Makefile b/proto/Makefile index c329a70b..8f811e88 100644 --- a/proto/Makefile +++ b/proto/Makefile @@ -30,6 +30,9 @@ gen: check-tools protoc --go_out=. --go_opt=paths=source_relative \ --go-grpc_out=. --go-grpc_opt=paths=source_relative \ distribution.proto + protoc --go_out=. --go_opt=paths=source_relative \ + --go-grpc_out=. --go-grpc_opt=paths=source_relative \ + admin.proto protoc --go_out=. --go_opt=paths=source_relative \ dynamodb_internal.proto protoc --go_out=. --go_opt=paths=source_relative \ diff --git a/proto/admin.pb.go b/proto/admin.pb.go new file mode 100644 index 00000000..5b0d08e6 --- /dev/null +++ b/proto/admin.pb.go @@ -0,0 +1,1533 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc v7.34.0 +// source: admin.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type KeyVizSeries int32 + +const ( + KeyVizSeries_KEYVIZ_SERIES_UNSPECIFIED KeyVizSeries = 0 + KeyVizSeries_KEYVIZ_SERIES_READS KeyVizSeries = 1 + KeyVizSeries_KEYVIZ_SERIES_WRITES KeyVizSeries = 2 + KeyVizSeries_KEYVIZ_SERIES_READ_BYTES KeyVizSeries = 3 + KeyVizSeries_KEYVIZ_SERIES_WRITE_BYTES KeyVizSeries = 4 +) + +// Enum value maps for KeyVizSeries. +var ( + KeyVizSeries_name = map[int32]string{ + 0: "KEYVIZ_SERIES_UNSPECIFIED", + 1: "KEYVIZ_SERIES_READS", + 2: "KEYVIZ_SERIES_WRITES", + 3: "KEYVIZ_SERIES_READ_BYTES", + 4: "KEYVIZ_SERIES_WRITE_BYTES", + } + KeyVizSeries_value = map[string]int32{ + "KEYVIZ_SERIES_UNSPECIFIED": 0, + "KEYVIZ_SERIES_READS": 1, + "KEYVIZ_SERIES_WRITES": 2, + "KEYVIZ_SERIES_READ_BYTES": 3, + "KEYVIZ_SERIES_WRITE_BYTES": 4, + } +) + +func (x KeyVizSeries) Enum() *KeyVizSeries { + p := new(KeyVizSeries) + *p = x + return p +} + +func (x KeyVizSeries) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (KeyVizSeries) Descriptor() protoreflect.EnumDescriptor { + return file_admin_proto_enumTypes[0].Descriptor() +} + +func (KeyVizSeries) Type() protoreflect.EnumType { + return &file_admin_proto_enumTypes[0] +} + +func (x KeyVizSeries) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use KeyVizSeries.Descriptor instead. +func (KeyVizSeries) EnumDescriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{0} +} + +type SampleRole int32 + +const ( + SampleRole_SAMPLE_ROLE_UNSPECIFIED SampleRole = 0 + SampleRole_SAMPLE_ROLE_LEADER_WRITE SampleRole = 1 + SampleRole_SAMPLE_ROLE_LEADER_READ SampleRole = 2 + SampleRole_SAMPLE_ROLE_FOLLOWER_READ SampleRole = 3 +) + +// Enum value maps for SampleRole. +var ( + SampleRole_name = map[int32]string{ + 0: "SAMPLE_ROLE_UNSPECIFIED", + 1: "SAMPLE_ROLE_LEADER_WRITE", + 2: "SAMPLE_ROLE_LEADER_READ", + 3: "SAMPLE_ROLE_FOLLOWER_READ", + } + SampleRole_value = map[string]int32{ + "SAMPLE_ROLE_UNSPECIFIED": 0, + "SAMPLE_ROLE_LEADER_WRITE": 1, + "SAMPLE_ROLE_LEADER_READ": 2, + "SAMPLE_ROLE_FOLLOWER_READ": 3, + } +) + +func (x SampleRole) Enum() *SampleRole { + p := new(SampleRole) + *p = x + return p +} + +func (x SampleRole) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SampleRole) Descriptor() protoreflect.EnumDescriptor { + return file_admin_proto_enumTypes[1].Descriptor() +} + +func (SampleRole) Type() protoreflect.EnumType { + return &file_admin_proto_enumTypes[1] +} + +func (x SampleRole) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SampleRole.Descriptor instead. +func (SampleRole) EnumDescriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{1} +} + +type NodeIdentity struct { + state protoimpl.MessageState `protogen:"open.v1"` + NodeId string `protobuf:"bytes,1,opt,name=node_id,json=nodeId,proto3" json:"node_id,omitempty"` + GrpcAddress string `protobuf:"bytes,2,opt,name=grpc_address,json=grpcAddress,proto3" json:"grpc_address,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *NodeIdentity) Reset() { + *x = NodeIdentity{} + mi := &file_admin_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *NodeIdentity) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NodeIdentity) ProtoMessage() {} + +func (x *NodeIdentity) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NodeIdentity.ProtoReflect.Descriptor instead. +func (*NodeIdentity) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{0} +} + +func (x *NodeIdentity) GetNodeId() string { + if x != nil { + return x.NodeId + } + return "" +} + +func (x *NodeIdentity) GetGrpcAddress() string { + if x != nil { + return x.GrpcAddress + } + return "" +} + +type GroupLeader struct { + state protoimpl.MessageState `protogen:"open.v1"` + RaftGroupId uint64 `protobuf:"varint,1,opt,name=raft_group_id,json=raftGroupId,proto3" json:"raft_group_id,omitempty"` + LeaderNodeId string `protobuf:"bytes,2,opt,name=leader_node_id,json=leaderNodeId,proto3" json:"leader_node_id,omitempty"` + LeaderTerm uint64 `protobuf:"varint,3,opt,name=leader_term,json=leaderTerm,proto3" json:"leader_term,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GroupLeader) Reset() { + *x = GroupLeader{} + mi := &file_admin_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GroupLeader) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GroupLeader) ProtoMessage() {} + +func (x *GroupLeader) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GroupLeader.ProtoReflect.Descriptor instead. +func (*GroupLeader) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{1} +} + +func (x *GroupLeader) GetRaftGroupId() uint64 { + if x != nil { + return x.RaftGroupId + } + return 0 +} + +func (x *GroupLeader) GetLeaderNodeId() string { + if x != nil { + return x.LeaderNodeId + } + return "" +} + +func (x *GroupLeader) GetLeaderTerm() uint64 { + if x != nil { + return x.LeaderTerm + } + return 0 +} + +type GetClusterOverviewRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetClusterOverviewRequest) Reset() { + *x = GetClusterOverviewRequest{} + mi := &file_admin_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetClusterOverviewRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetClusterOverviewRequest) ProtoMessage() {} + +func (x *GetClusterOverviewRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetClusterOverviewRequest.ProtoReflect.Descriptor instead. +func (*GetClusterOverviewRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{2} +} + +type GetClusterOverviewResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Self *NodeIdentity `protobuf:"bytes,1,opt,name=self,proto3" json:"self,omitempty"` + Members []*NodeIdentity `protobuf:"bytes,2,rep,name=members,proto3" json:"members,omitempty"` + GroupLeaders []*GroupLeader `protobuf:"bytes,3,rep,name=group_leaders,json=groupLeaders,proto3" json:"group_leaders,omitempty"` + AggregateQps uint64 `protobuf:"varint,4,opt,name=aggregate_qps,json=aggregateQps,proto3" json:"aggregate_qps,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetClusterOverviewResponse) Reset() { + *x = GetClusterOverviewResponse{} + mi := &file_admin_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetClusterOverviewResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetClusterOverviewResponse) ProtoMessage() {} + +func (x *GetClusterOverviewResponse) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetClusterOverviewResponse.ProtoReflect.Descriptor instead. +func (*GetClusterOverviewResponse) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{3} +} + +func (x *GetClusterOverviewResponse) GetSelf() *NodeIdentity { + if x != nil { + return x.Self + } + return nil +} + +func (x *GetClusterOverviewResponse) GetMembers() []*NodeIdentity { + if x != nil { + return x.Members + } + return nil +} + +func (x *GetClusterOverviewResponse) GetGroupLeaders() []*GroupLeader { + if x != nil { + return x.GroupLeaders + } + return nil +} + +func (x *GetClusterOverviewResponse) GetAggregateQps() uint64 { + if x != nil { + return x.AggregateQps + } + return 0 +} + +type RaftGroupState struct { + state protoimpl.MessageState `protogen:"open.v1"` + RaftGroupId uint64 `protobuf:"varint,1,opt,name=raft_group_id,json=raftGroupId,proto3" json:"raft_group_id,omitempty"` + LeaderNodeId string `protobuf:"bytes,2,opt,name=leader_node_id,json=leaderNodeId,proto3" json:"leader_node_id,omitempty"` + LeaderTerm uint64 `protobuf:"varint,3,opt,name=leader_term,json=leaderTerm,proto3" json:"leader_term,omitempty"` + CommitIndex uint64 `protobuf:"varint,4,opt,name=commit_index,json=commitIndex,proto3" json:"commit_index,omitempty"` + AppliedIndex uint64 `protobuf:"varint,5,opt,name=applied_index,json=appliedIndex,proto3" json:"applied_index,omitempty"` + // last_contact_unix_ms is the unix-ms timestamp of the most recent leader + // contact observed on this node. Zero means "unknown" (for example, the + // engine is a follower that has never heard from a leader); UIs should + // render that case as "unknown" rather than "contacted at epoch". + LastContactUnixMs int64 `protobuf:"varint,6,opt,name=last_contact_unix_ms,json=lastContactUnixMs,proto3" json:"last_contact_unix_ms,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *RaftGroupState) Reset() { + *x = RaftGroupState{} + mi := &file_admin_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *RaftGroupState) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RaftGroupState) ProtoMessage() {} + +func (x *RaftGroupState) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RaftGroupState.ProtoReflect.Descriptor instead. +func (*RaftGroupState) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{4} +} + +func (x *RaftGroupState) GetRaftGroupId() uint64 { + if x != nil { + return x.RaftGroupId + } + return 0 +} + +func (x *RaftGroupState) GetLeaderNodeId() string { + if x != nil { + return x.LeaderNodeId + } + return "" +} + +func (x *RaftGroupState) GetLeaderTerm() uint64 { + if x != nil { + return x.LeaderTerm + } + return 0 +} + +func (x *RaftGroupState) GetCommitIndex() uint64 { + if x != nil { + return x.CommitIndex + } + return 0 +} + +func (x *RaftGroupState) GetAppliedIndex() uint64 { + if x != nil { + return x.AppliedIndex + } + return 0 +} + +func (x *RaftGroupState) GetLastContactUnixMs() int64 { + if x != nil { + return x.LastContactUnixMs + } + return 0 +} + +type GetRaftGroupsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetRaftGroupsRequest) Reset() { + *x = GetRaftGroupsRequest{} + mi := &file_admin_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetRaftGroupsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetRaftGroupsRequest) ProtoMessage() {} + +func (x *GetRaftGroupsRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetRaftGroupsRequest.ProtoReflect.Descriptor instead. +func (*GetRaftGroupsRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{5} +} + +type GetRaftGroupsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Groups []*RaftGroupState `protobuf:"bytes,1,rep,name=groups,proto3" json:"groups,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetRaftGroupsResponse) Reset() { + *x = GetRaftGroupsResponse{} + mi := &file_admin_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetRaftGroupsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetRaftGroupsResponse) ProtoMessage() {} + +func (x *GetRaftGroupsResponse) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetRaftGroupsResponse.ProtoReflect.Descriptor instead. +func (*GetRaftGroupsResponse) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{6} +} + +func (x *GetRaftGroupsResponse) GetGroups() []*RaftGroupState { + if x != nil { + return x.Groups + } + return nil +} + +type AdapterSummary struct { + state protoimpl.MessageState `protogen:"open.v1"` + Adapter string `protobuf:"bytes,1,opt,name=adapter,proto3" json:"adapter,omitempty"` + Operation string `protobuf:"bytes,2,opt,name=operation,proto3" json:"operation,omitempty"` + Requests uint64 `protobuf:"varint,3,opt,name=requests,proto3" json:"requests,omitempty"` + InFlight uint64 `protobuf:"varint,4,opt,name=in_flight,json=inFlight,proto3" json:"in_flight,omitempty"` + BytesIn uint64 `protobuf:"varint,5,opt,name=bytes_in,json=bytesIn,proto3" json:"bytes_in,omitempty"` + BytesOut uint64 `protobuf:"varint,6,opt,name=bytes_out,json=bytesOut,proto3" json:"bytes_out,omitempty"` + P50Ns float64 `protobuf:"fixed64,7,opt,name=p50_ns,json=p50Ns,proto3" json:"p50_ns,omitempty"` + P95Ns float64 `protobuf:"fixed64,8,opt,name=p95_ns,json=p95Ns,proto3" json:"p95_ns,omitempty"` + P99Ns float64 `protobuf:"fixed64,9,opt,name=p99_ns,json=p99Ns,proto3" json:"p99_ns,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AdapterSummary) Reset() { + *x = AdapterSummary{} + mi := &file_admin_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AdapterSummary) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AdapterSummary) ProtoMessage() {} + +func (x *AdapterSummary) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AdapterSummary.ProtoReflect.Descriptor instead. +func (*AdapterSummary) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{7} +} + +func (x *AdapterSummary) GetAdapter() string { + if x != nil { + return x.Adapter + } + return "" +} + +func (x *AdapterSummary) GetOperation() string { + if x != nil { + return x.Operation + } + return "" +} + +func (x *AdapterSummary) GetRequests() uint64 { + if x != nil { + return x.Requests + } + return 0 +} + +func (x *AdapterSummary) GetInFlight() uint64 { + if x != nil { + return x.InFlight + } + return 0 +} + +func (x *AdapterSummary) GetBytesIn() uint64 { + if x != nil { + return x.BytesIn + } + return 0 +} + +func (x *AdapterSummary) GetBytesOut() uint64 { + if x != nil { + return x.BytesOut + } + return 0 +} + +func (x *AdapterSummary) GetP50Ns() float64 { + if x != nil { + return x.P50Ns + } + return 0 +} + +func (x *AdapterSummary) GetP95Ns() float64 { + if x != nil { + return x.P95Ns + } + return 0 +} + +func (x *AdapterSummary) GetP99Ns() float64 { + if x != nil { + return x.P99Ns + } + return 0 +} + +type GetAdapterSummaryRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetAdapterSummaryRequest) Reset() { + *x = GetAdapterSummaryRequest{} + mi := &file_admin_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetAdapterSummaryRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetAdapterSummaryRequest) ProtoMessage() {} + +func (x *GetAdapterSummaryRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetAdapterSummaryRequest.ProtoReflect.Descriptor instead. +func (*GetAdapterSummaryRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{8} +} + +type GetAdapterSummaryResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Summaries []*AdapterSummary `protobuf:"bytes,1,rep,name=summaries,proto3" json:"summaries,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetAdapterSummaryResponse) Reset() { + *x = GetAdapterSummaryResponse{} + mi := &file_admin_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetAdapterSummaryResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetAdapterSummaryResponse) ProtoMessage() {} + +func (x *GetAdapterSummaryResponse) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[9] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetAdapterSummaryResponse.ProtoReflect.Descriptor instead. +func (*GetAdapterSummaryResponse) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{9} +} + +func (x *GetAdapterSummaryResponse) GetSummaries() []*AdapterSummary { + if x != nil { + return x.Summaries + } + return nil +} + +type KeyVizRow struct { + state protoimpl.MessageState `protogen:"open.v1"` + // bucket_id is either "route:" or "virtual:". + BucketId string `protobuf:"bytes,1,opt,name=bucket_id,json=bucketId,proto3" json:"bucket_id,omitempty"` + Start []byte `protobuf:"bytes,2,opt,name=start,proto3" json:"start,omitempty"` + End []byte `protobuf:"bytes,3,opt,name=end,proto3" json:"end,omitempty"` + Label string `protobuf:"bytes,4,opt,name=label,proto3" json:"label,omitempty"` + Aggregate bool `protobuf:"varint,5,opt,name=aggregate,proto3" json:"aggregate,omitempty"` + RouteIds []uint64 `protobuf:"varint,6,rep,packed,name=route_ids,json=routeIds,proto3" json:"route_ids,omitempty"` + RouteIdsTruncated bool `protobuf:"varint,7,opt,name=route_ids_truncated,json=routeIdsTruncated,proto3" json:"route_ids_truncated,omitempty"` + RouteCount uint64 `protobuf:"varint,8,opt,name=route_count,json=routeCount,proto3" json:"route_count,omitempty"` + SampleRoles []SampleRole `protobuf:"varint,9,rep,packed,name=sample_roles,json=sampleRoles,proto3,enum=SampleRole" json:"sample_roles,omitempty"` + LineageId string `protobuf:"bytes,10,opt,name=lineage_id,json=lineageId,proto3" json:"lineage_id,omitempty"` + // values[j] is the series value at time column j. + Values []uint64 `protobuf:"varint,11,rep,packed,name=values,proto3" json:"values,omitempty"` + // soft_columns[j] is true when the j-th column missed the estimator SLO. + SoftColumns []bool `protobuf:"varint,12,rep,packed,name=soft_columns,json=softColumns,proto3" json:"soft_columns,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *KeyVizRow) Reset() { + *x = KeyVizRow{} + mi := &file_admin_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *KeyVizRow) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*KeyVizRow) ProtoMessage() {} + +func (x *KeyVizRow) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[10] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use KeyVizRow.ProtoReflect.Descriptor instead. +func (*KeyVizRow) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{10} +} + +func (x *KeyVizRow) GetBucketId() string { + if x != nil { + return x.BucketId + } + return "" +} + +func (x *KeyVizRow) GetStart() []byte { + if x != nil { + return x.Start + } + return nil +} + +func (x *KeyVizRow) GetEnd() []byte { + if x != nil { + return x.End + } + return nil +} + +func (x *KeyVizRow) GetLabel() string { + if x != nil { + return x.Label + } + return "" +} + +func (x *KeyVizRow) GetAggregate() bool { + if x != nil { + return x.Aggregate + } + return false +} + +func (x *KeyVizRow) GetRouteIds() []uint64 { + if x != nil { + return x.RouteIds + } + return nil +} + +func (x *KeyVizRow) GetRouteIdsTruncated() bool { + if x != nil { + return x.RouteIdsTruncated + } + return false +} + +func (x *KeyVizRow) GetRouteCount() uint64 { + if x != nil { + return x.RouteCount + } + return 0 +} + +func (x *KeyVizRow) GetSampleRoles() []SampleRole { + if x != nil { + return x.SampleRoles + } + return nil +} + +func (x *KeyVizRow) GetLineageId() string { + if x != nil { + return x.LineageId + } + return "" +} + +func (x *KeyVizRow) GetValues() []uint64 { + if x != nil { + return x.Values + } + return nil +} + +func (x *KeyVizRow) GetSoftColumns() []bool { + if x != nil { + return x.SoftColumns + } + return nil +} + +type GetKeyVizMatrixRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Series KeyVizSeries `protobuf:"varint,1,opt,name=series,proto3,enum=KeyVizSeries" json:"series,omitempty"` + FromUnixMs int64 `protobuf:"varint,2,opt,name=from_unix_ms,json=fromUnixMs,proto3" json:"from_unix_ms,omitempty"` + ToUnixMs int64 `protobuf:"varint,3,opt,name=to_unix_ms,json=toUnixMs,proto3" json:"to_unix_ms,omitempty"` + Rows uint32 `protobuf:"varint,4,opt,name=rows,proto3" json:"rows,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetKeyVizMatrixRequest) Reset() { + *x = GetKeyVizMatrixRequest{} + mi := &file_admin_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetKeyVizMatrixRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetKeyVizMatrixRequest) ProtoMessage() {} + +func (x *GetKeyVizMatrixRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[11] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetKeyVizMatrixRequest.ProtoReflect.Descriptor instead. +func (*GetKeyVizMatrixRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{11} +} + +func (x *GetKeyVizMatrixRequest) GetSeries() KeyVizSeries { + if x != nil { + return x.Series + } + return KeyVizSeries_KEYVIZ_SERIES_UNSPECIFIED +} + +func (x *GetKeyVizMatrixRequest) GetFromUnixMs() int64 { + if x != nil { + return x.FromUnixMs + } + return 0 +} + +func (x *GetKeyVizMatrixRequest) GetToUnixMs() int64 { + if x != nil { + return x.ToUnixMs + } + return 0 +} + +func (x *GetKeyVizMatrixRequest) GetRows() uint32 { + if x != nil { + return x.Rows + } + return 0 +} + +type GetKeyVizMatrixResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + ColumnUnixMs []int64 `protobuf:"varint,1,rep,packed,name=column_unix_ms,json=columnUnixMs,proto3" json:"column_unix_ms,omitempty"` + Rows []*KeyVizRow `protobuf:"bytes,2,rep,name=rows,proto3" json:"rows,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetKeyVizMatrixResponse) Reset() { + *x = GetKeyVizMatrixResponse{} + mi := &file_admin_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetKeyVizMatrixResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetKeyVizMatrixResponse) ProtoMessage() {} + +func (x *GetKeyVizMatrixResponse) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[12] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetKeyVizMatrixResponse.ProtoReflect.Descriptor instead. +func (*GetKeyVizMatrixResponse) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{12} +} + +func (x *GetKeyVizMatrixResponse) GetColumnUnixMs() []int64 { + if x != nil { + return x.ColumnUnixMs + } + return nil +} + +func (x *GetKeyVizMatrixResponse) GetRows() []*KeyVizRow { + if x != nil { + return x.Rows + } + return nil +} + +type GetRouteDetailRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Either a concrete route: or a virtual: emitted in a previous + // GetKeyVizMatrix response. + BucketId string `protobuf:"bytes,1,opt,name=bucket_id,json=bucketId,proto3" json:"bucket_id,omitempty"` + FromUnixMs int64 `protobuf:"varint,2,opt,name=from_unix_ms,json=fromUnixMs,proto3" json:"from_unix_ms,omitempty"` + ToUnixMs int64 `protobuf:"varint,3,opt,name=to_unix_ms,json=toUnixMs,proto3" json:"to_unix_ms,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetRouteDetailRequest) Reset() { + *x = GetRouteDetailRequest{} + mi := &file_admin_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetRouteDetailRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetRouteDetailRequest) ProtoMessage() {} + +func (x *GetRouteDetailRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[13] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetRouteDetailRequest.ProtoReflect.Descriptor instead. +func (*GetRouteDetailRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{13} +} + +func (x *GetRouteDetailRequest) GetBucketId() string { + if x != nil { + return x.BucketId + } + return "" +} + +func (x *GetRouteDetailRequest) GetFromUnixMs() int64 { + if x != nil { + return x.FromUnixMs + } + return 0 +} + +func (x *GetRouteDetailRequest) GetToUnixMs() int64 { + if x != nil { + return x.ToUnixMs + } + return 0 +} + +type GetRouteDetailResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Row *KeyVizRow `protobuf:"bytes,1,opt,name=row,proto3" json:"row,omitempty"` + PerAdapter []*AdapterSummary `protobuf:"bytes,2,rep,name=per_adapter,json=perAdapter,proto3" json:"per_adapter,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetRouteDetailResponse) Reset() { + *x = GetRouteDetailResponse{} + mi := &file_admin_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetRouteDetailResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetRouteDetailResponse) ProtoMessage() {} + +func (x *GetRouteDetailResponse) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[14] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetRouteDetailResponse.ProtoReflect.Descriptor instead. +func (*GetRouteDetailResponse) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{14} +} + +func (x *GetRouteDetailResponse) GetRow() *KeyVizRow { + if x != nil { + return x.Row + } + return nil +} + +func (x *GetRouteDetailResponse) GetPerAdapter() []*AdapterSummary { + if x != nil { + return x.PerAdapter + } + return nil +} + +type StreamEventsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StreamEventsRequest) Reset() { + *x = StreamEventsRequest{} + mi := &file_admin_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StreamEventsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StreamEventsRequest) ProtoMessage() {} + +func (x *StreamEventsRequest) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[15] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StreamEventsRequest.ProtoReflect.Descriptor instead. +func (*StreamEventsRequest) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{15} +} + +type StreamEventsEvent struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Event: + // + // *StreamEventsEvent_RouteTransition + // *StreamEventsEvent_KeyvizColumn + Event isStreamEventsEvent_Event `protobuf_oneof:"event"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StreamEventsEvent) Reset() { + *x = StreamEventsEvent{} + mi := &file_admin_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StreamEventsEvent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StreamEventsEvent) ProtoMessage() {} + +func (x *StreamEventsEvent) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[16] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StreamEventsEvent.ProtoReflect.Descriptor instead. +func (*StreamEventsEvent) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{16} +} + +func (x *StreamEventsEvent) GetEvent() isStreamEventsEvent_Event { + if x != nil { + return x.Event + } + return nil +} + +func (x *StreamEventsEvent) GetRouteTransition() *RouteTransition { + if x != nil { + if x, ok := x.Event.(*StreamEventsEvent_RouteTransition); ok { + return x.RouteTransition + } + } + return nil +} + +func (x *StreamEventsEvent) GetKeyvizColumn() *KeyVizColumn { + if x != nil { + if x, ok := x.Event.(*StreamEventsEvent_KeyvizColumn); ok { + return x.KeyvizColumn + } + } + return nil +} + +type isStreamEventsEvent_Event interface { + isStreamEventsEvent_Event() +} + +type StreamEventsEvent_RouteTransition struct { + RouteTransition *RouteTransition `protobuf:"bytes,1,opt,name=route_transition,json=routeTransition,proto3,oneof"` +} + +type StreamEventsEvent_KeyvizColumn struct { + KeyvizColumn *KeyVizColumn `protobuf:"bytes,2,opt,name=keyviz_column,json=keyvizColumn,proto3,oneof"` +} + +func (*StreamEventsEvent_RouteTransition) isStreamEventsEvent_Event() {} + +func (*StreamEventsEvent_KeyvizColumn) isStreamEventsEvent_Event() {} + +type RouteTransition struct { + state protoimpl.MessageState `protogen:"open.v1"` + ParentRouteId uint64 `protobuf:"varint,1,opt,name=parent_route_id,json=parentRouteId,proto3" json:"parent_route_id,omitempty"` + ChildRouteIds []uint64 `protobuf:"varint,2,rep,packed,name=child_route_ids,json=childRouteIds,proto3" json:"child_route_ids,omitempty"` + LineageId string `protobuf:"bytes,3,opt,name=lineage_id,json=lineageId,proto3" json:"lineage_id,omitempty"` + UnixMs int64 `protobuf:"varint,4,opt,name=unix_ms,json=unixMs,proto3" json:"unix_ms,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *RouteTransition) Reset() { + *x = RouteTransition{} + mi := &file_admin_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *RouteTransition) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RouteTransition) ProtoMessage() {} + +func (x *RouteTransition) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[17] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RouteTransition.ProtoReflect.Descriptor instead. +func (*RouteTransition) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{17} +} + +func (x *RouteTransition) GetParentRouteId() uint64 { + if x != nil { + return x.ParentRouteId + } + return 0 +} + +func (x *RouteTransition) GetChildRouteIds() []uint64 { + if x != nil { + return x.ChildRouteIds + } + return nil +} + +func (x *RouteTransition) GetLineageId() string { + if x != nil { + return x.LineageId + } + return "" +} + +func (x *RouteTransition) GetUnixMs() int64 { + if x != nil { + return x.UnixMs + } + return 0 +} + +type KeyVizColumn struct { + state protoimpl.MessageState `protogen:"open.v1"` + ColumnUnixMs int64 `protobuf:"varint,1,opt,name=column_unix_ms,json=columnUnixMs,proto3" json:"column_unix_ms,omitempty"` + Series KeyVizSeries `protobuf:"varint,2,opt,name=series,proto3,enum=KeyVizSeries" json:"series,omitempty"` + Rows []*KeyVizRow `protobuf:"bytes,3,rep,name=rows,proto3" json:"rows,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *KeyVizColumn) Reset() { + *x = KeyVizColumn{} + mi := &file_admin_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *KeyVizColumn) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*KeyVizColumn) ProtoMessage() {} + +func (x *KeyVizColumn) ProtoReflect() protoreflect.Message { + mi := &file_admin_proto_msgTypes[18] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use KeyVizColumn.ProtoReflect.Descriptor instead. +func (*KeyVizColumn) Descriptor() ([]byte, []int) { + return file_admin_proto_rawDescGZIP(), []int{18} +} + +func (x *KeyVizColumn) GetColumnUnixMs() int64 { + if x != nil { + return x.ColumnUnixMs + } + return 0 +} + +func (x *KeyVizColumn) GetSeries() KeyVizSeries { + if x != nil { + return x.Series + } + return KeyVizSeries_KEYVIZ_SERIES_UNSPECIFIED +} + +func (x *KeyVizColumn) GetRows() []*KeyVizRow { + if x != nil { + return x.Rows + } + return nil +} + +var File_admin_proto protoreflect.FileDescriptor + +const file_admin_proto_rawDesc = "" + + "\n" + + "\vadmin.proto\"J\n" + + "\fNodeIdentity\x12\x17\n" + + "\anode_id\x18\x01 \x01(\tR\x06nodeId\x12!\n" + + "\fgrpc_address\x18\x02 \x01(\tR\vgrpcAddress\"x\n" + + "\vGroupLeader\x12\"\n" + + "\rraft_group_id\x18\x01 \x01(\x04R\vraftGroupId\x12$\n" + + "\x0eleader_node_id\x18\x02 \x01(\tR\fleaderNodeId\x12\x1f\n" + + "\vleader_term\x18\x03 \x01(\x04R\n" + + "leaderTerm\"\x1b\n" + + "\x19GetClusterOverviewRequest\"\xc0\x01\n" + + "\x1aGetClusterOverviewResponse\x12!\n" + + "\x04self\x18\x01 \x01(\v2\r.NodeIdentityR\x04self\x12'\n" + + "\amembers\x18\x02 \x03(\v2\r.NodeIdentityR\amembers\x121\n" + + "\rgroup_leaders\x18\x03 \x03(\v2\f.GroupLeaderR\fgroupLeaders\x12#\n" + + "\raggregate_qps\x18\x04 \x01(\x04R\faggregateQps\"\xf4\x01\n" + + "\x0eRaftGroupState\x12\"\n" + + "\rraft_group_id\x18\x01 \x01(\x04R\vraftGroupId\x12$\n" + + "\x0eleader_node_id\x18\x02 \x01(\tR\fleaderNodeId\x12\x1f\n" + + "\vleader_term\x18\x03 \x01(\x04R\n" + + "leaderTerm\x12!\n" + + "\fcommit_index\x18\x04 \x01(\x04R\vcommitIndex\x12#\n" + + "\rapplied_index\x18\x05 \x01(\x04R\fappliedIndex\x12/\n" + + "\x14last_contact_unix_ms\x18\x06 \x01(\x03R\x11lastContactUnixMs\"\x16\n" + + "\x14GetRaftGroupsRequest\"@\n" + + "\x15GetRaftGroupsResponse\x12'\n" + + "\x06groups\x18\x01 \x03(\v2\x0f.RaftGroupStateR\x06groups\"\xfe\x01\n" + + "\x0eAdapterSummary\x12\x18\n" + + "\aadapter\x18\x01 \x01(\tR\aadapter\x12\x1c\n" + + "\toperation\x18\x02 \x01(\tR\toperation\x12\x1a\n" + + "\brequests\x18\x03 \x01(\x04R\brequests\x12\x1b\n" + + "\tin_flight\x18\x04 \x01(\x04R\binFlight\x12\x19\n" + + "\bbytes_in\x18\x05 \x01(\x04R\abytesIn\x12\x1b\n" + + "\tbytes_out\x18\x06 \x01(\x04R\bbytesOut\x12\x15\n" + + "\x06p50_ns\x18\a \x01(\x01R\x05p50Ns\x12\x15\n" + + "\x06p95_ns\x18\b \x01(\x01R\x05p95Ns\x12\x15\n" + + "\x06p99_ns\x18\t \x01(\x01R\x05p99Ns\"\x1a\n" + + "\x18GetAdapterSummaryRequest\"J\n" + + "\x19GetAdapterSummaryResponse\x12-\n" + + "\tsummaries\x18\x01 \x03(\v2\x0f.AdapterSummaryR\tsummaries\"\xfc\x02\n" + + "\tKeyVizRow\x12\x1b\n" + + "\tbucket_id\x18\x01 \x01(\tR\bbucketId\x12\x14\n" + + "\x05start\x18\x02 \x01(\fR\x05start\x12\x10\n" + + "\x03end\x18\x03 \x01(\fR\x03end\x12\x14\n" + + "\x05label\x18\x04 \x01(\tR\x05label\x12\x1c\n" + + "\taggregate\x18\x05 \x01(\bR\taggregate\x12\x1b\n" + + "\troute_ids\x18\x06 \x03(\x04R\brouteIds\x12.\n" + + "\x13route_ids_truncated\x18\a \x01(\bR\x11routeIdsTruncated\x12\x1f\n" + + "\vroute_count\x18\b \x01(\x04R\n" + + "routeCount\x12.\n" + + "\fsample_roles\x18\t \x03(\x0e2\v.SampleRoleR\vsampleRoles\x12\x1d\n" + + "\n" + + "lineage_id\x18\n" + + " \x01(\tR\tlineageId\x12\x16\n" + + "\x06values\x18\v \x03(\x04R\x06values\x12!\n" + + "\fsoft_columns\x18\f \x03(\bR\vsoftColumns\"\x93\x01\n" + + "\x16GetKeyVizMatrixRequest\x12%\n" + + "\x06series\x18\x01 \x01(\x0e2\r.KeyVizSeriesR\x06series\x12 \n" + + "\ffrom_unix_ms\x18\x02 \x01(\x03R\n" + + "fromUnixMs\x12\x1c\n" + + "\n" + + "to_unix_ms\x18\x03 \x01(\x03R\btoUnixMs\x12\x12\n" + + "\x04rows\x18\x04 \x01(\rR\x04rows\"_\n" + + "\x17GetKeyVizMatrixResponse\x12$\n" + + "\x0ecolumn_unix_ms\x18\x01 \x03(\x03R\fcolumnUnixMs\x12\x1e\n" + + "\x04rows\x18\x02 \x03(\v2\n" + + ".KeyVizRowR\x04rows\"t\n" + + "\x15GetRouteDetailRequest\x12\x1b\n" + + "\tbucket_id\x18\x01 \x01(\tR\bbucketId\x12 \n" + + "\ffrom_unix_ms\x18\x02 \x01(\x03R\n" + + "fromUnixMs\x12\x1c\n" + + "\n" + + "to_unix_ms\x18\x03 \x01(\x03R\btoUnixMs\"h\n" + + "\x16GetRouteDetailResponse\x12\x1c\n" + + "\x03row\x18\x01 \x01(\v2\n" + + ".KeyVizRowR\x03row\x120\n" + + "\vper_adapter\x18\x02 \x03(\v2\x0f.AdapterSummaryR\n" + + "perAdapter\"\x15\n" + + "\x13StreamEventsRequest\"\x91\x01\n" + + "\x11StreamEventsEvent\x12=\n" + + "\x10route_transition\x18\x01 \x01(\v2\x10.RouteTransitionH\x00R\x0frouteTransition\x124\n" + + "\rkeyviz_column\x18\x02 \x01(\v2\r.KeyVizColumnH\x00R\fkeyvizColumnB\a\n" + + "\x05event\"\x99\x01\n" + + "\x0fRouteTransition\x12&\n" + + "\x0fparent_route_id\x18\x01 \x01(\x04R\rparentRouteId\x12&\n" + + "\x0fchild_route_ids\x18\x02 \x03(\x04R\rchildRouteIds\x12\x1d\n" + + "\n" + + "lineage_id\x18\x03 \x01(\tR\tlineageId\x12\x17\n" + + "\aunix_ms\x18\x04 \x01(\x03R\x06unixMs\"{\n" + + "\fKeyVizColumn\x12$\n" + + "\x0ecolumn_unix_ms\x18\x01 \x01(\x03R\fcolumnUnixMs\x12%\n" + + "\x06series\x18\x02 \x01(\x0e2\r.KeyVizSeriesR\x06series\x12\x1e\n" + + "\x04rows\x18\x03 \x03(\v2\n" + + ".KeyVizRowR\x04rows*\x9d\x01\n" + + "\fKeyVizSeries\x12\x1d\n" + + "\x19KEYVIZ_SERIES_UNSPECIFIED\x10\x00\x12\x17\n" + + "\x13KEYVIZ_SERIES_READS\x10\x01\x12\x18\n" + + "\x14KEYVIZ_SERIES_WRITES\x10\x02\x12\x1c\n" + + "\x18KEYVIZ_SERIES_READ_BYTES\x10\x03\x12\x1d\n" + + "\x19KEYVIZ_SERIES_WRITE_BYTES\x10\x04*\x83\x01\n" + + "\n" + + "SampleRole\x12\x1b\n" + + "\x17SAMPLE_ROLE_UNSPECIFIED\x10\x00\x12\x1c\n" + + "\x18SAMPLE_ROLE_LEADER_WRITE\x10\x01\x12\x1b\n" + + "\x17SAMPLE_ROLE_LEADER_READ\x10\x02\x12\x1d\n" + + "\x19SAMPLE_ROLE_FOLLOWER_READ\x10\x032\xb3\x03\n" + + "\x05Admin\x12O\n" + + "\x12GetClusterOverview\x12\x1a.GetClusterOverviewRequest\x1a\x1b.GetClusterOverviewResponse\"\x00\x12@\n" + + "\rGetRaftGroups\x12\x15.GetRaftGroupsRequest\x1a\x16.GetRaftGroupsResponse\"\x00\x12L\n" + + "\x11GetAdapterSummary\x12\x19.GetAdapterSummaryRequest\x1a\x1a.GetAdapterSummaryResponse\"\x00\x12F\n" + + "\x0fGetKeyVizMatrix\x12\x17.GetKeyVizMatrixRequest\x1a\x18.GetKeyVizMatrixResponse\"\x00\x12C\n" + + "\x0eGetRouteDetail\x12\x16.GetRouteDetailRequest\x1a\x17.GetRouteDetailResponse\"\x00\x12<\n" + + "\fStreamEvents\x12\x14.StreamEventsRequest\x1a\x12.StreamEventsEvent\"\x000\x01B#Z!github.com/bootjp/elastickv/protob\x06proto3" + +var ( + file_admin_proto_rawDescOnce sync.Once + file_admin_proto_rawDescData []byte +) + +func file_admin_proto_rawDescGZIP() []byte { + file_admin_proto_rawDescOnce.Do(func() { + file_admin_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_admin_proto_rawDesc), len(file_admin_proto_rawDesc))) + }) + return file_admin_proto_rawDescData +} + +var file_admin_proto_enumTypes = make([]protoimpl.EnumInfo, 2) +var file_admin_proto_msgTypes = make([]protoimpl.MessageInfo, 19) +var file_admin_proto_goTypes = []any{ + (KeyVizSeries)(0), // 0: KeyVizSeries + (SampleRole)(0), // 1: SampleRole + (*NodeIdentity)(nil), // 2: NodeIdentity + (*GroupLeader)(nil), // 3: GroupLeader + (*GetClusterOverviewRequest)(nil), // 4: GetClusterOverviewRequest + (*GetClusterOverviewResponse)(nil), // 5: GetClusterOverviewResponse + (*RaftGroupState)(nil), // 6: RaftGroupState + (*GetRaftGroupsRequest)(nil), // 7: GetRaftGroupsRequest + (*GetRaftGroupsResponse)(nil), // 8: GetRaftGroupsResponse + (*AdapterSummary)(nil), // 9: AdapterSummary + (*GetAdapterSummaryRequest)(nil), // 10: GetAdapterSummaryRequest + (*GetAdapterSummaryResponse)(nil), // 11: GetAdapterSummaryResponse + (*KeyVizRow)(nil), // 12: KeyVizRow + (*GetKeyVizMatrixRequest)(nil), // 13: GetKeyVizMatrixRequest + (*GetKeyVizMatrixResponse)(nil), // 14: GetKeyVizMatrixResponse + (*GetRouteDetailRequest)(nil), // 15: GetRouteDetailRequest + (*GetRouteDetailResponse)(nil), // 16: GetRouteDetailResponse + (*StreamEventsRequest)(nil), // 17: StreamEventsRequest + (*StreamEventsEvent)(nil), // 18: StreamEventsEvent + (*RouteTransition)(nil), // 19: RouteTransition + (*KeyVizColumn)(nil), // 20: KeyVizColumn +} +var file_admin_proto_depIdxs = []int32{ + 2, // 0: GetClusterOverviewResponse.self:type_name -> NodeIdentity + 2, // 1: GetClusterOverviewResponse.members:type_name -> NodeIdentity + 3, // 2: GetClusterOverviewResponse.group_leaders:type_name -> GroupLeader + 6, // 3: GetRaftGroupsResponse.groups:type_name -> RaftGroupState + 9, // 4: GetAdapterSummaryResponse.summaries:type_name -> AdapterSummary + 1, // 5: KeyVizRow.sample_roles:type_name -> SampleRole + 0, // 6: GetKeyVizMatrixRequest.series:type_name -> KeyVizSeries + 12, // 7: GetKeyVizMatrixResponse.rows:type_name -> KeyVizRow + 12, // 8: GetRouteDetailResponse.row:type_name -> KeyVizRow + 9, // 9: GetRouteDetailResponse.per_adapter:type_name -> AdapterSummary + 19, // 10: StreamEventsEvent.route_transition:type_name -> RouteTransition + 20, // 11: StreamEventsEvent.keyviz_column:type_name -> KeyVizColumn + 0, // 12: KeyVizColumn.series:type_name -> KeyVizSeries + 12, // 13: KeyVizColumn.rows:type_name -> KeyVizRow + 4, // 14: Admin.GetClusterOverview:input_type -> GetClusterOverviewRequest + 7, // 15: Admin.GetRaftGroups:input_type -> GetRaftGroupsRequest + 10, // 16: Admin.GetAdapterSummary:input_type -> GetAdapterSummaryRequest + 13, // 17: Admin.GetKeyVizMatrix:input_type -> GetKeyVizMatrixRequest + 15, // 18: Admin.GetRouteDetail:input_type -> GetRouteDetailRequest + 17, // 19: Admin.StreamEvents:input_type -> StreamEventsRequest + 5, // 20: Admin.GetClusterOverview:output_type -> GetClusterOverviewResponse + 8, // 21: Admin.GetRaftGroups:output_type -> GetRaftGroupsResponse + 11, // 22: Admin.GetAdapterSummary:output_type -> GetAdapterSummaryResponse + 14, // 23: Admin.GetKeyVizMatrix:output_type -> GetKeyVizMatrixResponse + 16, // 24: Admin.GetRouteDetail:output_type -> GetRouteDetailResponse + 18, // 25: Admin.StreamEvents:output_type -> StreamEventsEvent + 20, // [20:26] is the sub-list for method output_type + 14, // [14:20] is the sub-list for method input_type + 14, // [14:14] is the sub-list for extension type_name + 14, // [14:14] is the sub-list for extension extendee + 0, // [0:14] is the sub-list for field type_name +} + +func init() { file_admin_proto_init() } +func file_admin_proto_init() { + if File_admin_proto != nil { + return + } + file_admin_proto_msgTypes[16].OneofWrappers = []any{ + (*StreamEventsEvent_RouteTransition)(nil), + (*StreamEventsEvent_KeyvizColumn)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_admin_proto_rawDesc), len(file_admin_proto_rawDesc)), + NumEnums: 2, + NumMessages: 19, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_admin_proto_goTypes, + DependencyIndexes: file_admin_proto_depIdxs, + EnumInfos: file_admin_proto_enumTypes, + MessageInfos: file_admin_proto_msgTypes, + }.Build() + File_admin_proto = out.File + file_admin_proto_goTypes = nil + file_admin_proto_depIdxs = nil +} diff --git a/proto/admin.proto b/proto/admin.proto new file mode 100644 index 00000000..d0e61ede --- /dev/null +++ b/proto/admin.proto @@ -0,0 +1,153 @@ +syntax = "proto3"; + +option go_package = "github.com/bootjp/elastickv/proto"; + +// Admin is the node-side read-only admin gRPC service consumed by +// cmd/elastickv-admin. Every method requires "authorization: Bearer " +// metadata unless the node was started with --adminInsecureNoAuth. +// See docs/admin_ui_key_visualizer_design.md §4 (Layer A). +service Admin { + rpc GetClusterOverview (GetClusterOverviewRequest) returns (GetClusterOverviewResponse) {} + rpc GetRaftGroups (GetRaftGroupsRequest) returns (GetRaftGroupsResponse) {} + rpc GetAdapterSummary (GetAdapterSummaryRequest) returns (GetAdapterSummaryResponse) {} + rpc GetKeyVizMatrix (GetKeyVizMatrixRequest) returns (GetKeyVizMatrixResponse) {} + rpc GetRouteDetail (GetRouteDetailRequest) returns (GetRouteDetailResponse) {} + rpc StreamEvents (StreamEventsRequest) returns (stream StreamEventsEvent) {} +} + +message NodeIdentity { + string node_id = 1; + string grpc_address = 2; +} + +message GroupLeader { + uint64 raft_group_id = 1; + string leader_node_id = 2; + uint64 leader_term = 3; +} + +message GetClusterOverviewRequest {} + +message GetClusterOverviewResponse { + NodeIdentity self = 1; + repeated NodeIdentity members = 2; + repeated GroupLeader group_leaders = 3; + uint64 aggregate_qps = 4; +} + +message RaftGroupState { + uint64 raft_group_id = 1; + string leader_node_id = 2; + uint64 leader_term = 3; + uint64 commit_index = 4; + uint64 applied_index = 5; + // last_contact_unix_ms is the unix-ms timestamp of the most recent leader + // contact observed on this node. Zero means "unknown" (for example, the + // engine is a follower that has never heard from a leader); UIs should + // render that case as "unknown" rather than "contacted at epoch". + int64 last_contact_unix_ms = 6; +} + +message GetRaftGroupsRequest {} + +message GetRaftGroupsResponse { + repeated RaftGroupState groups = 1; +} + +message AdapterSummary { + string adapter = 1; + string operation = 2; + uint64 requests = 3; + uint64 in_flight = 4; + uint64 bytes_in = 5; + uint64 bytes_out = 6; + double p50_ns = 7; + double p95_ns = 8; + double p99_ns = 9; +} + +message GetAdapterSummaryRequest {} + +message GetAdapterSummaryResponse { + repeated AdapterSummary summaries = 1; +} + +enum KeyVizSeries { + KEYVIZ_SERIES_UNSPECIFIED = 0; + KEYVIZ_SERIES_READS = 1; + KEYVIZ_SERIES_WRITES = 2; + KEYVIZ_SERIES_READ_BYTES = 3; + KEYVIZ_SERIES_WRITE_BYTES = 4; +} + +enum SampleRole { + SAMPLE_ROLE_UNSPECIFIED = 0; + SAMPLE_ROLE_LEADER_WRITE = 1; + SAMPLE_ROLE_LEADER_READ = 2; + SAMPLE_ROLE_FOLLOWER_READ = 3; +} + +message KeyVizRow { + // bucket_id is either "route:" or "virtual:". + string bucket_id = 1; + bytes start = 2; + bytes end = 3; + string label = 4; + bool aggregate = 5; + repeated uint64 route_ids = 6; + bool route_ids_truncated = 7; + uint64 route_count = 8; + repeated SampleRole sample_roles = 9; + string lineage_id = 10; + // values[j] is the series value at time column j. + repeated uint64 values = 11; + // soft_columns[j] is true when the j-th column missed the estimator SLO. + repeated bool soft_columns = 12; +} + +message GetKeyVizMatrixRequest { + KeyVizSeries series = 1; + int64 from_unix_ms = 2; + int64 to_unix_ms = 3; + uint32 rows = 4; +} + +message GetKeyVizMatrixResponse { + repeated int64 column_unix_ms = 1; + repeated KeyVizRow rows = 2; +} + +message GetRouteDetailRequest { + // Either a concrete route: or a virtual: emitted in a previous + // GetKeyVizMatrix response. + string bucket_id = 1; + int64 from_unix_ms = 2; + int64 to_unix_ms = 3; +} + +message GetRouteDetailResponse { + KeyVizRow row = 1; + repeated AdapterSummary per_adapter = 2; +} + +message StreamEventsRequest {} + +message StreamEventsEvent { + oneof event { + RouteTransition route_transition = 1; + KeyVizColumn keyviz_column = 2; + } +} + +message RouteTransition { + uint64 parent_route_id = 1; + repeated uint64 child_route_ids = 2; + string lineage_id = 3; + int64 unix_ms = 4; +} + +message KeyVizColumn { + int64 column_unix_ms = 1; + KeyVizSeries series = 2; + repeated KeyVizRow rows = 3; +} diff --git a/proto/admin_grpc.pb.go b/proto/admin_grpc.pb.go new file mode 100644 index 00000000..379805d3 --- /dev/null +++ b/proto/admin_grpc.pb.go @@ -0,0 +1,325 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.6.1 +// - protoc v7.34.0 +// source: admin.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + Admin_GetClusterOverview_FullMethodName = "/Admin/GetClusterOverview" + Admin_GetRaftGroups_FullMethodName = "/Admin/GetRaftGroups" + Admin_GetAdapterSummary_FullMethodName = "/Admin/GetAdapterSummary" + Admin_GetKeyVizMatrix_FullMethodName = "/Admin/GetKeyVizMatrix" + Admin_GetRouteDetail_FullMethodName = "/Admin/GetRouteDetail" + Admin_StreamEvents_FullMethodName = "/Admin/StreamEvents" +) + +// AdminClient is the client API for Admin service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// Admin is the node-side read-only admin gRPC service consumed by +// cmd/elastickv-admin. Every method requires "authorization: Bearer " +// metadata unless the node was started with --adminInsecureNoAuth. +// See docs/admin_ui_key_visualizer_design.md §4 (Layer A). +type AdminClient interface { + GetClusterOverview(ctx context.Context, in *GetClusterOverviewRequest, opts ...grpc.CallOption) (*GetClusterOverviewResponse, error) + GetRaftGroups(ctx context.Context, in *GetRaftGroupsRequest, opts ...grpc.CallOption) (*GetRaftGroupsResponse, error) + GetAdapterSummary(ctx context.Context, in *GetAdapterSummaryRequest, opts ...grpc.CallOption) (*GetAdapterSummaryResponse, error) + GetKeyVizMatrix(ctx context.Context, in *GetKeyVizMatrixRequest, opts ...grpc.CallOption) (*GetKeyVizMatrixResponse, error) + GetRouteDetail(ctx context.Context, in *GetRouteDetailRequest, opts ...grpc.CallOption) (*GetRouteDetailResponse, error) + StreamEvents(ctx context.Context, in *StreamEventsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[StreamEventsEvent], error) +} + +type adminClient struct { + cc grpc.ClientConnInterface +} + +func NewAdminClient(cc grpc.ClientConnInterface) AdminClient { + return &adminClient{cc} +} + +func (c *adminClient) GetClusterOverview(ctx context.Context, in *GetClusterOverviewRequest, opts ...grpc.CallOption) (*GetClusterOverviewResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetClusterOverviewResponse) + err := c.cc.Invoke(ctx, Admin_GetClusterOverview_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *adminClient) GetRaftGroups(ctx context.Context, in *GetRaftGroupsRequest, opts ...grpc.CallOption) (*GetRaftGroupsResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetRaftGroupsResponse) + err := c.cc.Invoke(ctx, Admin_GetRaftGroups_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *adminClient) GetAdapterSummary(ctx context.Context, in *GetAdapterSummaryRequest, opts ...grpc.CallOption) (*GetAdapterSummaryResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetAdapterSummaryResponse) + err := c.cc.Invoke(ctx, Admin_GetAdapterSummary_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *adminClient) GetKeyVizMatrix(ctx context.Context, in *GetKeyVizMatrixRequest, opts ...grpc.CallOption) (*GetKeyVizMatrixResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetKeyVizMatrixResponse) + err := c.cc.Invoke(ctx, Admin_GetKeyVizMatrix_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *adminClient) GetRouteDetail(ctx context.Context, in *GetRouteDetailRequest, opts ...grpc.CallOption) (*GetRouteDetailResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(GetRouteDetailResponse) + err := c.cc.Invoke(ctx, Admin_GetRouteDetail_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *adminClient) StreamEvents(ctx context.Context, in *StreamEventsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[StreamEventsEvent], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &Admin_ServiceDesc.Streams[0], Admin_StreamEvents_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[StreamEventsRequest, StreamEventsEvent]{ClientStream: stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type Admin_StreamEventsClient = grpc.ServerStreamingClient[StreamEventsEvent] + +// AdminServer is the server API for Admin service. +// All implementations must embed UnimplementedAdminServer +// for forward compatibility. +// +// Admin is the node-side read-only admin gRPC service consumed by +// cmd/elastickv-admin. Every method requires "authorization: Bearer " +// metadata unless the node was started with --adminInsecureNoAuth. +// See docs/admin_ui_key_visualizer_design.md §4 (Layer A). +type AdminServer interface { + GetClusterOverview(context.Context, *GetClusterOverviewRequest) (*GetClusterOverviewResponse, error) + GetRaftGroups(context.Context, *GetRaftGroupsRequest) (*GetRaftGroupsResponse, error) + GetAdapterSummary(context.Context, *GetAdapterSummaryRequest) (*GetAdapterSummaryResponse, error) + GetKeyVizMatrix(context.Context, *GetKeyVizMatrixRequest) (*GetKeyVizMatrixResponse, error) + GetRouteDetail(context.Context, *GetRouteDetailRequest) (*GetRouteDetailResponse, error) + StreamEvents(*StreamEventsRequest, grpc.ServerStreamingServer[StreamEventsEvent]) error + mustEmbedUnimplementedAdminServer() +} + +// UnimplementedAdminServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedAdminServer struct{} + +func (UnimplementedAdminServer) GetClusterOverview(context.Context, *GetClusterOverviewRequest) (*GetClusterOverviewResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetClusterOverview not implemented") +} +func (UnimplementedAdminServer) GetRaftGroups(context.Context, *GetRaftGroupsRequest) (*GetRaftGroupsResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetRaftGroups not implemented") +} +func (UnimplementedAdminServer) GetAdapterSummary(context.Context, *GetAdapterSummaryRequest) (*GetAdapterSummaryResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetAdapterSummary not implemented") +} +func (UnimplementedAdminServer) GetKeyVizMatrix(context.Context, *GetKeyVizMatrixRequest) (*GetKeyVizMatrixResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetKeyVizMatrix not implemented") +} +func (UnimplementedAdminServer) GetRouteDetail(context.Context, *GetRouteDetailRequest) (*GetRouteDetailResponse, error) { + return nil, status.Error(codes.Unimplemented, "method GetRouteDetail not implemented") +} +func (UnimplementedAdminServer) StreamEvents(*StreamEventsRequest, grpc.ServerStreamingServer[StreamEventsEvent]) error { + return status.Error(codes.Unimplemented, "method StreamEvents not implemented") +} +func (UnimplementedAdminServer) mustEmbedUnimplementedAdminServer() {} +func (UnimplementedAdminServer) testEmbeddedByValue() {} + +// UnsafeAdminServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to AdminServer will +// result in compilation errors. +type UnsafeAdminServer interface { + mustEmbedUnimplementedAdminServer() +} + +func RegisterAdminServer(s grpc.ServiceRegistrar, srv AdminServer) { + // If the following call panics, it indicates UnimplementedAdminServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&Admin_ServiceDesc, srv) +} + +func _Admin_GetClusterOverview_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetClusterOverviewRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AdminServer).GetClusterOverview(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Admin_GetClusterOverview_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AdminServer).GetClusterOverview(ctx, req.(*GetClusterOverviewRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Admin_GetRaftGroups_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetRaftGroupsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AdminServer).GetRaftGroups(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Admin_GetRaftGroups_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AdminServer).GetRaftGroups(ctx, req.(*GetRaftGroupsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Admin_GetAdapterSummary_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetAdapterSummaryRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AdminServer).GetAdapterSummary(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Admin_GetAdapterSummary_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AdminServer).GetAdapterSummary(ctx, req.(*GetAdapterSummaryRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Admin_GetKeyVizMatrix_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetKeyVizMatrixRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AdminServer).GetKeyVizMatrix(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Admin_GetKeyVizMatrix_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AdminServer).GetKeyVizMatrix(ctx, req.(*GetKeyVizMatrixRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Admin_GetRouteDetail_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetRouteDetailRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AdminServer).GetRouteDetail(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Admin_GetRouteDetail_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AdminServer).GetRouteDetail(ctx, req.(*GetRouteDetailRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Admin_StreamEvents_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(StreamEventsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(AdminServer).StreamEvents(m, &grpc.GenericServerStream[StreamEventsRequest, StreamEventsEvent]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type Admin_StreamEventsServer = grpc.ServerStreamingServer[StreamEventsEvent] + +// Admin_ServiceDesc is the grpc.ServiceDesc for Admin service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var Admin_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "Admin", + HandlerType: (*AdminServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetClusterOverview", + Handler: _Admin_GetClusterOverview_Handler, + }, + { + MethodName: "GetRaftGroups", + Handler: _Admin_GetRaftGroups_Handler, + }, + { + MethodName: "GetAdapterSummary", + Handler: _Admin_GetAdapterSummary_Handler, + }, + { + MethodName: "GetKeyVizMatrix", + Handler: _Admin_GetKeyVizMatrix_Handler, + }, + { + MethodName: "GetRouteDetail", + Handler: _Admin_GetRouteDetail_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "StreamEvents", + Handler: _Admin_StreamEvents_Handler, + ServerStreams: true, + }, + }, + Metadata: "admin.proto", +}