From 41193ce71a897e894bdbb73aed567a9bc9d14384 Mon Sep 17 00:00:00 2001 From: jupblb Date: Wed, 3 Jun 2026 17:40:05 +0200 Subject: [PATCH 1/3] Share document accumulator between Java/Kotlin plugins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts SemanticdbDocumentBuilder into semanticdb-shared so both plugins use one accumulator that enforces a single output policy: - exact-duplicate occurrences and SymbolInformations are suppressed - occurrences are sorted by (startLine, startCharacter) before assembly - the assembled document is stamped with SEMANTICDB4 + caller language Pre-refactor, javac's accumulator was a raw ArrayList (no dedup, no sort), while kotlinc's accumulator did both via List.contains() / a post-hoc sortWith(). Standardizing on the kotlin policy means future plugins inherit it for free and downstream tools see consistent output. Tests, javac snapshots, and kotlinc snapshots all still pass byte-for- byte — javac's traversal already produced source-ordered, non-duplicate output, so the new policy is a no-op on the existing corpus. --- .../semanticdb_javac/SemanticdbVisitor.java | 24 +++---- .../SemanticdbTextDocumentBuilder.kt | 26 ++----- .../semanticdb/SemanticdbDocumentBuilder.java | 71 +++++++++++++++++++ 3 files changed, 87 insertions(+), 34 deletions(-) create mode 100644 semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java index bc0f233a..4de9289a 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java @@ -2,6 +2,7 @@ import com.sourcegraph.semanticdb.Semanticdb; +import com.sourcegraph.semanticdb.SemanticdbDocumentBuilder; import com.sourcegraph.semanticdb.SemanticdbPaths; import com.sourcegraph.semanticdb.SemanticdbSymbols; @@ -69,8 +70,7 @@ public class SemanticdbVisitor extends TreePathScanner { private final CompilationUnitTree compUnitTree; private final Elements elements; private final SemanticdbJavacOptions options; - private final ArrayList occurrences; - private final ArrayList symbolInfos; + private final SemanticdbDocumentBuilder documentBuilder = new SemanticdbDocumentBuilder(); private String source; private String uri; @@ -90,8 +90,6 @@ public SemanticdbVisitor( this.elements = elements; this.trees = trees; this.compUnitTree = compUnitTree; - this.occurrences = new ArrayList<>(); - this.symbolInfos = new ArrayList<>(); this.source = semanticdbText(); this.uri = semanticdbUri(compUnitTree, options); this.nodes = new LinkedHashMap<>(); @@ -102,15 +100,11 @@ public Semanticdb.TextDocument buildTextDocument(CompilationUnitTree tree) { resolveNodes(); - return Semanticdb.TextDocument.newBuilder() - .setSchema(Semanticdb.Schema.SEMANTICDB4) - .setLanguage(Semanticdb.Language.JAVA) - .setUri(uri) - .setText(options.includeText ? this.source : "") - .setMd5(semanticdbMd5()) - .addAllOccurrences(occurrences) - .addAllSymbols(symbolInfos) - .build(); + return documentBuilder.build( + Semanticdb.Language.JAVA, + uri, + options.includeText ? this.source : "", + semanticdbMd5()); } private Optional emitSymbolOccurrence( @@ -135,7 +129,7 @@ private void emitSymbolOccurrence( if (sym == null) return; Optional occ = semanticdbOccurrence(sym, range, role, enclosingRange); - occ.ifPresent(occurrences::add); + occ.ifPresent(documentBuilder::addOccurrence); } private void emitSymbolInformation(Element sym, Tree tree) { @@ -201,7 +195,7 @@ private void emitSymbolInformation(Element sym, Tree tree) { Semanticdb.SymbolInformation info = builder.build(); - symbolInfos.add(info); + documentBuilder.addSymbol(info); } void resolveNodes() { diff --git a/semanticdb-kotlinc/src/main/kotlin/com/sourcegraph/semanticdb_kotlinc/SemanticdbTextDocumentBuilder.kt b/semanticdb-kotlinc/src/main/kotlin/com/sourcegraph/semanticdb_kotlinc/SemanticdbTextDocumentBuilder.kt index 06219322..c9a348b6 100644 --- a/semanticdb-kotlinc/src/main/kotlin/com/sourcegraph/semanticdb_kotlinc/SemanticdbTextDocumentBuilder.kt +++ b/semanticdb-kotlinc/src/main/kotlin/com/sourcegraph/semanticdb_kotlinc/SemanticdbTextDocumentBuilder.kt @@ -3,6 +3,7 @@ package com.sourcegraph.semanticdb_kotlinc import com.sourcegraph.semanticdb.Semanticdb import com.sourcegraph.semanticdb.Semanticdb.SymbolOccurrence.Role +import com.sourcegraph.semanticdb.SemanticdbDocumentBuilder import com.sourcegraph.semanticdb.SemanticdbPaths import java.nio.file.Path import java.nio.file.Paths @@ -33,21 +34,12 @@ class SemanticdbTextDocumentBuilder( private val lineMap: LineMap, private val cache: SymbolsCache, ) { - private val occurrences = mutableListOf() - private val symbols = mutableListOf() + private val documentBuilder = SemanticdbDocumentBuilder() private val fileText = file.getContentsAsStream().reader().readText() private val semanticMd5 = semanticdbMD5() - fun build() = TextDocument { - this.text = fileText - this.uri = semanticdbURI() - this.md5 = semanticMd5 - this.schema = Semanticdb.Schema.SEMANTICDB4 - this.language = Semanticdb.Language.KOTLIN - occurrences.sortWith(compareBy({ it.range.startLine }, { it.range.startCharacter })) - this.addAllOccurrences(occurrences) - this.addAllSymbols(symbols) - } + fun build(): Semanticdb.TextDocument = + documentBuilder.build(Semanticdb.Language.KOTLIN, semanticdbURI(), fileText, semanticMd5) fun emitSemanticdbData( firBasedSymbol: FirBasedSymbol<*>?, @@ -57,14 +49,10 @@ class SemanticdbTextDocumentBuilder( context: CheckerContext, enclosingSource: KtSourceElement? = null, ) { - symbolOccurrence(symbol, element, role, enclosingSource).let { - if (!occurrences.contains(it)) { - occurrences.add(it) - } + documentBuilder.addOccurrence(symbolOccurrence(symbol, element, role, enclosingSource)) + if (role == Role.DEFINITION) { + documentBuilder.addSymbol(symbolInformation(firBasedSymbol, symbol, element, context)) } - val symbolInformation = symbolInformation(firBasedSymbol, symbol, element, context) - if (role == Role.DEFINITION && !symbols.contains(symbolInformation)) - symbols.add(symbolInformation) } @OptIn(SymbolInternals::class) diff --git a/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java new file mode 100644 index 00000000..54f0303f --- /dev/null +++ b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java @@ -0,0 +1,71 @@ +package com.sourcegraph.semanticdb; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Accumulator for the {@link Semanticdb.SymbolOccurrence} and {@link Semanticdb.SymbolInformation} + * payloads produced by a single SemanticDB-emitting compiler plugin, and assembler for the final + * {@link Semanticdb.TextDocument}. + * + *

The builder enforces the canonical SemanticDB output policy shared by every producer in this + * repository: + * + *

    + *
  • exact-duplicate {@link Semanticdb.SymbolOccurrence}s and {@link + * Semanticdb.SymbolInformation}s are suppressed, + *
  • occurrences are sorted by {@code (startLine, startCharacter)} before being written to the + * assembled {@link Semanticdb.TextDocument}, + *
  • the assembled document is stamped with {@link Semanticdb.Schema#SEMANTICDB4} and the + * caller-supplied {@link Semanticdb.Language}. + *
+ * + *

Compiler-specific concerns (AST traversal, symbol resolution, range computation, text/MD5 + * acquisition, language selection) stay in the plugins; this class is intentionally protobuf-only + * and contains no compiler-API dependencies. + */ +public final class SemanticdbDocumentBuilder { + private static final Comparator OCCURRENCE_ORDER = + Comparator.comparingInt(o -> o.getRange().getStartLine()) + .thenComparingInt(o -> o.getRange().getStartCharacter()); + + private final List occurrences = new ArrayList<>(); + private final List symbols = new ArrayList<>(); + private final Set seenOccurrences = new HashSet<>(); + private final Set seenSymbols = new HashSet<>(); + + /** Adds an occurrence to the document, suppressing exact duplicates. */ + public void addOccurrence(Semanticdb.SymbolOccurrence occurrence) { + if (seenOccurrences.add(occurrence)) { + occurrences.add(occurrence); + } + } + + /** Adds a symbol information entry to the document, suppressing exact duplicates. */ + public void addSymbol(Semanticdb.SymbolInformation symbol) { + if (seenSymbols.add(symbol)) { + symbols.add(symbol); + } + } + + /** + * Assembles a {@link Semanticdb.TextDocument} carrying the accumulated occurrences (sorted by + * range) and symbol informations. + */ + public Semanticdb.TextDocument build( + Semanticdb.Language language, String uri, String text, String md5) { + occurrences.sort(OCCURRENCE_ORDER); + return Semanticdb.TextDocument.newBuilder() + .setSchema(Semanticdb.Schema.SEMANTICDB4) + .setLanguage(language) + .setUri(uri) + .setText(text) + .setMd5(md5) + .addAllOccurrences(occurrences) + .addAllSymbols(symbols) + .build(); + } +} From 8f4832c1f2e4a3b4310ad4817038bce91cfdb9fd Mon Sep 17 00:00:00 2001 From: jupblb Date: Wed, 3 Jun 2026 19:01:34 +0200 Subject: [PATCH 2/3] Unify javac multi-ANALYZE document accumulator Javac fires ANALYZE once per top-level type, so a multi-type source file like Interfaces.java produces several ANALYZE events for the same target SemanticDB file. The old appendSemanticdb logic tried to merge across rounds via HashSet dedup of full protos, but the merged result was never assigned back, so effectively only the first round's data was kept on disk. Worse, even when merging worked, occurrences differed across rounds (later rounds lose enclosing_range positions for already-attributed types) and local symbol IDs (`local 0`, `local 1`, ...) drifted because each round had a fresh LocalSymbolsCache. Replace the on-disk read/merge/write with an in-memory PerSourceState keyed by output path. It bundles the shared SemanticdbDocumentBuilder and the LocalSymbolsCache, both of which now survive across ANALYZE rounds. SemanticdbDocumentBuilder switches to per-key dedup (occurrences by (range, symbol, role), symbols by symbol-name) with first-emission- wins semantics, so the round that originally analyzed a given type wins its richer information without losing any new occurrences/symbols added by later rounds. LombokBuilder.java snapshot regenerated: lombok's repeated synthetic positions are now collapsed (10x duplicate `reference java/` -> 1x), matching the kotlinc plugin's policy. --- .../SemanticdbTaskListener.java | 114 ++++++------------ .../semanticdb_javac/SemanticdbVisitor.java | 17 +-- .../semanticdb/SemanticdbDocumentBuilder.java | 74 ++++++++---- .../main/java/minimized/LombokBuilder.java | 34 ------ 4 files changed, 98 insertions(+), 141 deletions(-) diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java index db7ae270..15144d37 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java @@ -1,6 +1,7 @@ package com.sourcegraph.semanticdb_javac; import com.sourcegraph.semanticdb.Semanticdb; +import com.sourcegraph.semanticdb.SemanticdbDocumentBuilder; import com.sourcegraph.semanticdb.SemanticdbPaths; import com.sourcegraph.semanticdb.SemanticdbWriter; @@ -12,15 +13,16 @@ import javax.lang.model.util.Types; import javax.tools.JavaFileObject; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintWriter; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.HashSet; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; /** * Callback hook that generates SemanticDB when the compiler has completed typechecking a Java @@ -33,6 +35,16 @@ public final class SemanticdbTaskListener implements TaskListener { private final Types types; private final Trees trees; private final Elements elements; + // One shared accumulator + local-symbol cache per output path. Javac fires + // ANALYZE once per top-level type, so a multi-type source file produces + // several ANALYZE events that all target the same SemanticDB file. We + // accumulate across rounds so the final document keeps the richest + // information from any single round (notably, enclosing_range positions + // are only stable in the round that originally analyzed a given type) and + // the LocalSymbolsCache is shared so `local 0`, `local 1`, ... keep stable + // identities across rounds. See SemanticdbDocumentBuilder for the dedup + // policy. + private final Map perSourceState = new HashMap<>(); private int noRelativePathCounter = 0; public SemanticdbTaskListener( @@ -58,8 +70,10 @@ public void started(TaskEvent e) { inferBazelSourceroot(e.getSourceFile()); Result semanticdbPath = semanticdbOutputPath(options, e); if (semanticdbPath.isOk()) { + Path output = semanticdbPath.getOrThrow(); + perSourceState.remove(output); try { - Files.deleteIfExists(semanticdbPath.getOrThrow()); + Files.deleteIfExists(output); } catch (IOException ex) { this.reportException(ex, e); } @@ -116,12 +130,20 @@ private void onFinishedAnalyze(TaskEvent e) { Result path = semanticdbOutputPath(options, e); if (path != null) { if (path.isOk()) { + Path output = path.getOrThrow(); + PerSourceState state = perSourceState.computeIfAbsent(output, k -> new PerSourceState()); Semanticdb.TextDocument textDocument = - new SemanticdbVisitor(globals, e.getCompilationUnit(), options, types, trees, elements) + new SemanticdbVisitor( + globals, + state.locals, + e.getCompilationUnit(), + options, + types, + trees, + elements, + state.documentBuilder) .buildTextDocument(e.getCompilationUnit()); - Path output = path.getOrThrow(); - if (Files.exists(output)) appendSemanticdb(e, output, textDocument); - else writeSemanticdb(e, output, textDocument); + writeSemanticdb(e, output, textDocument); } else { reporter.error(path.getErrorOrThrow(), e); } @@ -136,76 +158,10 @@ private void writeSemanticdb(TaskEvent event, Path output, Semanticdb.TextDocume } } - private void appendSemanticdb( - TaskEvent event, Path output, Semanticdb.TextDocument textDocument) { - /* - * If there already is a semanticdb file at the given path, - * we do the following: - * - Read a documents collection - * - Try to find the document with the matching relative path (matching the incoming textDocument) - * - Then, depending on whether a matching document already exists in the collection: - * - if YES, mutate it in place to only add entries from the incoming document - * - if NO, simply add the incoming text document to the collection - * - Write the collection back to disk - * */ - Semanticdb.TextDocument document = null; - int documentIndex = -1; - Semanticdb.TextDocuments documents = null; - - try (InputStream is = Files.newInputStream(output.toFile().toPath())) { - documents = Semanticdb.TextDocuments.parseFrom(is); - - for (int i = 0; i < documents.getDocumentsCount(); i++) { - Semanticdb.TextDocument candidate = documents.getDocuments(i); - if (document == null && candidate.getUri().equals(textDocument.getUri())) { - document = candidate; - documentIndex = i; - } - } - - } catch (IOException e) { - this.reportException(e, event); - return; - } - - if (document != null) { - // If there is a previous semanticdb document at this path, we need - // to deduplicate symbols and occurrences and mutate the document in place - Set symbols = new HashSet<>(textDocument.getSymbolsList()); - Set occurrences = - new HashSet<>(textDocument.getOccurrencesList()); - Set synthetics = new HashSet<>(textDocument.getSyntheticsList()); - - symbols.addAll(document.getSymbolsList()); - occurrences.addAll(document.getOccurrencesList()); - synthetics.addAll(document.getSyntheticsList()); - - documents - .toBuilder() - .addDocuments( - documentIndex, - document - .toBuilder() - .clearOccurrences() - .addAllOccurrences(occurrences) - .clearSymbols() - .addAllSymbols(symbols) - .clearSynthetics() - .addAllSynthetics(synthetics)); - - } else { - // If no prior document was found, we can just add the incoming one to the collection - documents = documents.toBuilder().addDocuments(textDocument).build(); - } - - byte[] bytes = documents.toByteArray(); - - try { - Files.createDirectories(output.getParent()); - Files.write(output, bytes); - } catch (IOException e) { - this.reportException(e, event); - } + /** Per-source-file state that survives across all ANALYZE rounds for that source. */ + private static final class PerSourceState { + final SemanticdbDocumentBuilder documentBuilder = new SemanticdbDocumentBuilder(); + final LocalSymbolsCache locals = new LocalSymbolsCache(); } public static Path absolutePathFromUri(SemanticdbJavacOptions options, JavaFileObject file) { diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java index 4de9289a..68cc4ce1 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java @@ -70,7 +70,7 @@ public class SemanticdbVisitor extends TreePathScanner { private final CompilationUnitTree compUnitTree; private final Elements elements; private final SemanticdbJavacOptions options; - private final SemanticdbDocumentBuilder documentBuilder = new SemanticdbDocumentBuilder(); + private final SemanticdbDocumentBuilder documentBuilder; private String source; private String uri; @@ -78,18 +78,24 @@ public class SemanticdbVisitor extends TreePathScanner { public SemanticdbVisitor( GlobalSymbolsCache globals, + LocalSymbolsCache locals, CompilationUnitTree compUnitTree, SemanticdbJavacOptions options, Types types, Trees trees, - Elements elements) { + Elements elements, + SemanticdbDocumentBuilder documentBuilder) { this.globals = globals; // Reused cache between compilation units. - this.locals = new LocalSymbolsCache(); // Fresh cache per compilation unit. + // Reused across all ANALYZE rounds for the same source so that local + // symbols (`local 0`, `local 1`, ...) keep stable identities even when + // javac fires several ANALYZE events for a multi-type source file. + this.locals = locals; this.options = options; this.types = types; this.elements = elements; this.trees = trees; this.compUnitTree = compUnitTree; + this.documentBuilder = documentBuilder; this.source = semanticdbText(); this.uri = semanticdbUri(compUnitTree, options); this.nodes = new LinkedHashMap<>(); @@ -101,10 +107,7 @@ public Semanticdb.TextDocument buildTextDocument(CompilationUnitTree tree) { resolveNodes(); return documentBuilder.build( - Semanticdb.Language.JAVA, - uri, - options.includeText ? this.source : "", - semanticdbMd5()); + Semanticdb.Language.JAVA, uri, options.includeText ? this.source : "", semanticdbMd5()); } private Optional emitSymbolOccurrence( diff --git a/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java index 54f0303f..3d76cc0c 100644 --- a/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java +++ b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java @@ -2,21 +2,28 @@ import java.util.ArrayList; import java.util.Comparator; -import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; -import java.util.Set; +import java.util.Map; +import java.util.Objects; /** * Accumulator for the {@link Semanticdb.SymbolOccurrence} and {@link Semanticdb.SymbolInformation} - * payloads produced by a single SemanticDB-emitting compiler plugin, and assembler for the final - * {@link Semanticdb.TextDocument}. + * payloads produced by a SemanticDB-emitting compiler plugin, and assembler for the final {@link + * Semanticdb.TextDocument}. * *

The builder enforces the canonical SemanticDB output policy shared by every producer in this * repository: * *

    - *
  • exact-duplicate {@link Semanticdb.SymbolOccurrence}s and {@link - * Semanticdb.SymbolInformation}s are suppressed, + *
  • occurrences are deduplicated by {@code (range, symbol, role)} — additional fields like + * {@code enclosing_range} are not part of identity, so a later emission that happens to lose + * the enclosing range does not introduce a "duplicate" occurrence, + *
  • {@link Semanticdb.SymbolInformation}s are deduplicated by {@code symbol}, + *
  • in both cases the first emission wins, which lets multi-pass producers (e.g. the javac + * plugin, which scans the same compilation unit on every {@code ANALYZE} round and where only + * the first round has complete position information for already-attributed types) accumulate + * across rounds without losing data, *
  • occurrences are sorted by {@code (startLine, startCharacter)} before being written to the * assembled {@link Semanticdb.TextDocument}, *
  • the assembled document is stamped with {@link Semanticdb.Schema#SEMANTICDB4} and the @@ -32,23 +39,20 @@ public final class SemanticdbDocumentBuilder { Comparator.comparingInt(o -> o.getRange().getStartLine()) .thenComparingInt(o -> o.getRange().getStartCharacter()); - private final List occurrences = new ArrayList<>(); - private final List symbols = new ArrayList<>(); - private final Set seenOccurrences = new HashSet<>(); - private final Set seenSymbols = new HashSet<>(); + private final Map occurrences = new LinkedHashMap<>(); + private final Map symbols = new LinkedHashMap<>(); - /** Adds an occurrence to the document, suppressing exact duplicates. */ + /** + * Adds an occurrence, keeping the first emission for any given {@code (range, symbol, role)} + * triple. + */ public void addOccurrence(Semanticdb.SymbolOccurrence occurrence) { - if (seenOccurrences.add(occurrence)) { - occurrences.add(occurrence); - } + occurrences.putIfAbsent(new OccurrenceKey(occurrence), occurrence); } - /** Adds a symbol information entry to the document, suppressing exact duplicates. */ + /** Adds a symbol information entry, keeping the first emission per {@code symbol}. */ public void addSymbol(Semanticdb.SymbolInformation symbol) { - if (seenSymbols.add(symbol)) { - symbols.add(symbol); - } + symbols.putIfAbsent(symbol.getSymbol(), symbol); } /** @@ -57,15 +61,43 @@ public void addSymbol(Semanticdb.SymbolInformation symbol) { */ public Semanticdb.TextDocument build( Semanticdb.Language language, String uri, String text, String md5) { - occurrences.sort(OCCURRENCE_ORDER); + List sortedOccurrences = new ArrayList<>(occurrences.values()); + sortedOccurrences.sort(OCCURRENCE_ORDER); return Semanticdb.TextDocument.newBuilder() .setSchema(Semanticdb.Schema.SEMANTICDB4) .setLanguage(language) .setUri(uri) .setText(text) .setMd5(md5) - .addAllOccurrences(occurrences) - .addAllSymbols(symbols) + .addAllOccurrences(sortedOccurrences) + .addAllSymbols(symbols.values()) .build(); } + + private static final class OccurrenceKey { + private final Semanticdb.Range range; + private final String symbol; + private final Semanticdb.SymbolOccurrence.Role role; + + OccurrenceKey(Semanticdb.SymbolOccurrence occurrence) { + this.range = occurrence.hasRange() ? occurrence.getRange() : null; + this.symbol = occurrence.getSymbol(); + this.role = occurrence.getRole(); + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (!(other instanceof OccurrenceKey)) return false; + OccurrenceKey that = (OccurrenceKey) other; + return role == that.role + && Objects.equals(range, that.range) + && Objects.equals(symbol, that.symbol); + } + + @Override + public int hashCode() { + return Objects.hash(range, symbol, role); + } + } } diff --git a/tests/snapshots/src/main/generated/tests/minimized/src/main/java/minimized/LombokBuilder.java b/tests/snapshots/src/main/generated/tests/minimized/src/main/java/minimized/LombokBuilder.java index 20e09be4..8bb0cff5 100644 --- a/tests/snapshots/src/main/generated/tests/minimized/src/main/java/minimized/LombokBuilder.java +++ b/tests/snapshots/src/main/generated/tests/minimized/src/main/java/minimized/LombokBuilder.java @@ -15,48 +15,14 @@ //^^^^^^^^^^^^^^^ reference local 0 //^^^^^^^^^^^^^^^ reference local 1 //^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ //^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . java/lang/ -//^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello# -//^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello# //^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello# //^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#HelloBuilder# -//^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#HelloBuilder# -//^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#HelloBuilder#message. -//^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#HelloBuilder#message. //^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#HelloBuilder#message. //^^^^^^^^^^^^^^^ reference semanticdb maven . . minimized/Hello#message. //^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/Override# //^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/String# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/String# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/String# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/String# //^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings# -//^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings#value(). //^^^^^^^^^^^^^^^ reference semanticdb maven jdk 11 java/lang/SuppressWarnings#value(). // ^^^^^ reference semanticdb maven . . lombok/ // ^^^^^^^ reference semanticdb maven . . lombok/Builder# From a057b3f60c7aa30a921a0eddfe29fe9415c12f46 Mon Sep 17 00:00:00 2001 From: jupblb Date: Wed, 3 Jun 2026 19:27:54 +0200 Subject: [PATCH 3/3] Trim comments on document accumulator changes --- .../SemanticdbTaskListener.java | 11 +----- .../semanticdb_javac/SemanticdbVisitor.java | 5 +-- .../semanticdb/SemanticdbDocumentBuilder.java | 38 ++----------------- 3 files changed, 6 insertions(+), 48 deletions(-) diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java index 15144d37..a1625376 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbTaskListener.java @@ -35,15 +35,7 @@ public final class SemanticdbTaskListener implements TaskListener { private final Types types; private final Trees trees; private final Elements elements; - // One shared accumulator + local-symbol cache per output path. Javac fires - // ANALYZE once per top-level type, so a multi-type source file produces - // several ANALYZE events that all target the same SemanticDB file. We - // accumulate across rounds so the final document keeps the richest - // information from any single round (notably, enclosing_range positions - // are only stable in the round that originally analyzed a given type) and - // the LocalSymbolsCache is shared so `local 0`, `local 1`, ... keep stable - // identities across rounds. See SemanticdbDocumentBuilder for the dedup - // policy. + // Javac fires ANALYZE once per top-level type; accumulate across rounds per output path. private final Map perSourceState = new HashMap<>(); private int noRelativePathCounter = 0; @@ -158,7 +150,6 @@ private void writeSemanticdb(TaskEvent event, Path output, Semanticdb.TextDocume } } - /** Per-source-file state that survives across all ANALYZE rounds for that source. */ private static final class PerSourceState { final SemanticdbDocumentBuilder documentBuilder = new SemanticdbDocumentBuilder(); final LocalSymbolsCache locals = new LocalSymbolsCache(); diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java index 68cc4ce1..f06c8dde 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java @@ -85,10 +85,7 @@ public SemanticdbVisitor( Trees trees, Elements elements, SemanticdbDocumentBuilder documentBuilder) { - this.globals = globals; // Reused cache between compilation units. - // Reused across all ANALYZE rounds for the same source so that local - // symbols (`local 0`, `local 1`, ...) keep stable identities even when - // javac fires several ANALYZE events for a multi-type source file. + this.globals = globals; this.locals = locals; this.options = options; this.types = types; diff --git a/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java index 3d76cc0c..e9d5c6be 100644 --- a/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java +++ b/semanticdb-shared/src/main/java/com/sourcegraph/semanticdb/SemanticdbDocumentBuilder.java @@ -8,31 +8,10 @@ import java.util.Objects; /** - * Accumulator for the {@link Semanticdb.SymbolOccurrence} and {@link Semanticdb.SymbolInformation} - * payloads produced by a SemanticDB-emitting compiler plugin, and assembler for the final {@link - * Semanticdb.TextDocument}. - * - *

    The builder enforces the canonical SemanticDB output policy shared by every producer in this - * repository: - * - *

      - *
    • occurrences are deduplicated by {@code (range, symbol, role)} — additional fields like - * {@code enclosing_range} are not part of identity, so a later emission that happens to lose - * the enclosing range does not introduce a "duplicate" occurrence, - *
    • {@link Semanticdb.SymbolInformation}s are deduplicated by {@code symbol}, - *
    • in both cases the first emission wins, which lets multi-pass producers (e.g. the javac - * plugin, which scans the same compilation unit on every {@code ANALYZE} round and where only - * the first round has complete position information for already-attributed types) accumulate - * across rounds without losing data, - *
    • occurrences are sorted by {@code (startLine, startCharacter)} before being written to the - * assembled {@link Semanticdb.TextDocument}, - *
    • the assembled document is stamped with {@link Semanticdb.Schema#SEMANTICDB4} and the - * caller-supplied {@link Semanticdb.Language}. - *
    - * - *

    Compiler-specific concerns (AST traversal, symbol resolution, range computation, text/MD5 - * acquisition, language selection) stay in the plugins; this class is intentionally protobuf-only - * and contains no compiler-API dependencies. + * Accumulator for {@link Semanticdb.SymbolOccurrence}/{@link Semanticdb.SymbolInformation} that + * assembles a final {@link Semanticdb.TextDocument}. First emission wins: occurrences are + * deduplicated by {@code (range, symbol, role)}, symbols by {@code symbol}; occurrences are sorted + * by start position. Contains no compiler-API dependencies. */ public final class SemanticdbDocumentBuilder { private static final Comparator OCCURRENCE_ORDER = @@ -42,23 +21,14 @@ public final class SemanticdbDocumentBuilder { private final Map occurrences = new LinkedHashMap<>(); private final Map symbols = new LinkedHashMap<>(); - /** - * Adds an occurrence, keeping the first emission for any given {@code (range, symbol, role)} - * triple. - */ public void addOccurrence(Semanticdb.SymbolOccurrence occurrence) { occurrences.putIfAbsent(new OccurrenceKey(occurrence), occurrence); } - /** Adds a symbol information entry, keeping the first emission per {@code symbol}. */ public void addSymbol(Semanticdb.SymbolInformation symbol) { symbols.putIfAbsent(symbol.getSymbol(), symbol); } - /** - * Assembles a {@link Semanticdb.TextDocument} carrying the accumulated occurrences (sorted by - * range) and symbol informations. - */ public Semanticdb.TextDocument build( Semanticdb.Language language, String uri, String text, String md5) { List sortedOccurrences = new ArrayList<>(occurrences.values());