Skip to content

Commit 8c231a2

Browse files
authored
perf: adjust javadoc return content (#932)
* chore: update * chore: update * fix: fix comments * fix: update the code * fix: update * ci: update * pref: remove html tags * perf: remove useless code * perf: remove useless code
1 parent e7bf941 commit 8c231a2

File tree

1 file changed

+174
-70
lines changed

1 file changed

+174
-70
lines changed

jdtls.ext/com.microsoft.jdtls.ext.core/src/com/microsoft/jdtls/ext/core/parser/ContextResolver.java

Lines changed: 174 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,14 @@ private static boolean isCommonJdkType(String typeName) {
231231
return false;
232232
}
233233

234-
// Extract package name from fully qualified type name
235234
int lastDotIndex = typeName.lastIndexOf('.');
236235
if (lastDotIndex == -1) {
237-
return false; // No package (default package)
236+
return false;
238237
}
239238

240239
String packageName = typeName.substring(0, lastDotIndex);
241240

242-
// Check if package matches any common JDK package
243-
// This includes both exact matches and sub-packages
241+
// Check exact match or sub-package match
244242
return SKIP_COMMON_JDK_PACKAGES.contains(packageName) ||
245243
SKIP_COMMON_JDK_PACKAGES.stream().anyMatch(pkg -> packageName.startsWith(pkg + "."));
246244
}
@@ -862,13 +860,15 @@ public static String generateClassDescription(org.eclipse.jdt.core.IType type, S
862860
// ================ JavaDoc Extraction Methods ================
863861

864862
/**
865-
* Extracts relevant code snippets from Javadoc.
866-
* This method is optimized to extract code from `<code>` tags and markdown code fences,
867-
* and formats them in an LLM-readable format.
863+
* Extracts relevant JavaDoc content including description text and code snippets.
864+
* This method extracts:
865+
* 1. Class description (first paragraph of text)
866+
* 2. Code snippets from <code>, <pre>, and ``` blocks
867+
* 3. @deprecated tag if present
868868
*
869869
* @param type the type to extract Javadoc from.
870870
* @param monitor the progress monitor.
871-
* @return A string containing all found code snippets, formatted as markdown code blocks.
871+
* @return A string containing description and code snippets in LLM-readable format.
872872
*/
873873
private static String extractRelevantJavaDocContent(org.eclipse.jdt.core.IType type, IProgressMonitor monitor) {
874874
try {
@@ -883,7 +883,6 @@ private static String extractRelevantJavaDocContent(org.eclipse.jdt.core.IType t
883883
}
884884

885885
String rawJavadoc;
886-
boolean isHtml = false;
887886

888887
// Extract JavaDoc from source code (fast - no I/O, no network, no HTML parsing)
889888
org.eclipse.jdt.core.ISourceRange javadocRange = type.getJavadocRange();
@@ -896,49 +895,89 @@ private static String extractRelevantJavaDocContent(org.eclipse.jdt.core.IType t
896895
return "";
897896
}
898897

899-
StringBuilder allCodeSnippets = new StringBuilder();
898+
StringBuilder result = new StringBuilder();
900899
Set<String> seenCodeSnippets = new HashSet<>();
900+
901+
// Clean Javadoc comment for processing
902+
String cleanedJavadoc = cleanJavadocComment(rawJavadoc);
903+
cleanedJavadoc = removeHtmlTags(cleanedJavadoc);
904+
cleanedJavadoc = convertHtmlEntities(cleanedJavadoc);
905+
906+
// === High Priority: Extract class description text (first paragraph) ===
907+
String description = extractClassDescription(cleanedJavadoc);
908+
if (isNotEmpty(description)) {
909+
result.append("Description:\n").append(description).append("\n\n");
910+
}
901911

912+
// === Extract code snippets ===
902913
// 1. Extract markdown code blocks (```...```)
903914
Matcher markdownMatcher = MARKDOWN_CODE_PATTERN.matcher(rawJavadoc);
904915
while (markdownMatcher.find()) {
905916
String code = markdownMatcher.group(1).trim();
906917
if (isNotEmpty(code) && seenCodeSnippets.add(code)) {
907-
allCodeSnippets.append("```java\n").append(code).append("\n```\n\n");
918+
result.append("```java\n").append(code).append("\n```\n\n");
908919
}
909920
}
910921

911922
// 2. Extract HTML <pre> and <code> blocks
912-
// Clean Javadoc comment for HTML extraction
913-
String cleanedForHtml = isHtml ? rawJavadoc : cleanJavadocComment(rawJavadoc);
914-
cleanedForHtml = convertHtmlEntities(cleanedForHtml);
915-
916923
// Priority 1: <pre> blocks (often contain well-formatted code)
917-
Matcher preMatcher = HTML_PRE_PATTERN.matcher(cleanedForHtml);
924+
Matcher preMatcher = HTML_PRE_PATTERN.matcher(cleanedJavadoc);
918925
while (preMatcher.find()) {
919926
String code = preMatcher.group(1).replaceAll("(?i)<code[^>]*>", "").replaceAll("(?i)</code>", "").trim();
920927
if (isNotEmpty(code) && seenCodeSnippets.add(code)) {
921-
allCodeSnippets.append("```java\n").append(code).append("\n```\n\n");
928+
result.append("```java\n").append(code).append("\n```\n\n");
922929
}
923930
}
924931

925932
// Priority 2: <code> blocks (for inline snippets)
926-
Matcher codeMatcher = HTML_CODE_PATTERN.matcher(cleanedForHtml);
933+
Matcher codeMatcher = HTML_CODE_PATTERN.matcher(cleanedJavadoc);
927934
while (codeMatcher.find()) {
928935
String code = codeMatcher.group(1).trim();
929936
// Use HashSet for O(1) duplicate checking
930937
if (isNotEmpty(code) && seenCodeSnippets.add(code)) {
931-
allCodeSnippets.append("```java\n").append(code).append("\n```\n\n");
938+
result.append("```java\n").append(code).append("\n```\n\n");
932939
}
933940
}
934941

935-
return allCodeSnippets.toString().trim();
942+
return result.toString().trim();
936943

937944
} catch (Exception e) {
938945
JdtlsExtActivator.logException("Error extracting relevant JavaDoc content for: " + type.getElementName(), e);
939946
return "";
940947
}
941948
}
949+
950+
/**
951+
* Extract the main description paragraph from class JavaDoc (before @tags and code blocks).
952+
* Returns the first paragraph of descriptive text, limited to reasonable length.
953+
*/
954+
private static String extractClassDescription(String cleanedJavadoc) {
955+
if (cleanedJavadoc == null || cleanedJavadoc.isEmpty()) {
956+
return "";
957+
}
958+
959+
// Remove code blocks first to get pure text
960+
String textOnly = cleanedJavadoc;
961+
textOnly = MARKDOWN_CODE_PATTERN.matcher(textOnly).replaceAll("");
962+
textOnly = HTML_PRE_PATTERN.matcher(textOnly).replaceAll("");
963+
textOnly = HTML_CODE_PATTERN.matcher(textOnly).replaceAll("");
964+
965+
// Extract description before @tags
966+
String description = extractJavadocDescription(textOnly);
967+
968+
// Limit to ~2000 characters
969+
if (description.length() > 2000) {
970+
int breakPoint = findBestBreakpoint(description, 1500, 2100);
971+
if (breakPoint != -1) {
972+
description = description.substring(0, breakPoint + 1).trim();
973+
} else {
974+
int lastSpace = description.lastIndexOf(' ', 2000);
975+
description = description.substring(0, lastSpace > 1500 ? lastSpace : 2000).trim() + "...";
976+
}
977+
}
978+
979+
return description.trim();
980+
}
942981

943982
/**
944983
* Clean up raw JavaDoc comment by removing comment markers and asterisks
@@ -987,27 +1026,68 @@ private static String convertHtmlEntities(String text) {
9871026
if (text == null || text.isEmpty()) {
9881027
return text;
9891028
}
990-
String result = text;
991-
result = result.replace("&nbsp;", " ");
992-
result = result.replace("&lt;", "<");
993-
result = result.replace("&gt;", ">");
994-
result = result.replace("&amp;", "&");
995-
result = result.replace("&quot;", "\"");
996-
result = result.replace("&#39;", "'");
997-
result = result.replace("&apos;", "'");
998-
result = result.replace("&mdash;", "-");
999-
result = result.replace("&ndash;", "-");
1000-
return result;
1029+
return text.replace("&nbsp;", " ")
1030+
.replace("&lt;", "<")
1031+
.replace("&gt;", ">")
1032+
.replace("&amp;", "&")
1033+
.replace("&quot;", "\"")
1034+
.replace("&#39;", "'")
1035+
.replace("&apos;", "'")
1036+
.replace("&mdash;", "-")
1037+
.replace("&ndash;", "-");
10011038
}
10021039

10031040
/**
1004-
* Extract summary description from method JavaDoc
1005-
* Returns the first sentence or paragraph of the JavaDoc as a brief description
1041+
* Remove all HTML tags from text, keeping only plain text content.
1042+
* Preserves line breaks for block-level tags like <p>, <br>, <div>.
10061043
*/
1007-
private static String extractMethodJavaDocSummary(IMethod method) {
1008-
return extractJavaDocSummaryFromElement(method);
1044+
private static String removeHtmlTags(String text) {
1045+
if (text == null || text.isEmpty()) {
1046+
return text;
1047+
}
1048+
1049+
// Replace block-level tags with line breaks
1050+
text = text.replaceAll("(?i)</(p|div|li)>|<br\\s*/?>|<p[^>]*>", "\n");
1051+
1052+
// Remove all remaining HTML tags
1053+
text = text.replaceAll("<[^>]+>", "");
1054+
1055+
// Clean up whitespace: collapse spaces, trim lines, limit line breaks
1056+
text = text.replaceAll("[ \\t]+", " ")
1057+
.replaceAll(" *\\n *", "\n")
1058+
.replaceAll("\\n{3,}", "\n\n");
1059+
1060+
return text.trim();
10091061
}
10101062

1063+
/**
1064+
* Extract method JavaDoc content directly for LLM consumption.
1065+
* Returns cleaned JavaDoc without artificial truncation - let LLM understand the full context.
1066+
*/
1067+
private static String extractMethodJavaDocSummary(IMethod method) {
1068+
try {
1069+
org.eclipse.jdt.core.ISourceRange javadocRange = method.getJavadocRange();
1070+
if (javadocRange == null) {
1071+
return "";
1072+
}
1073+
1074+
String rawJavadoc = method.getCompilationUnit().getSource()
1075+
.substring(javadocRange.getOffset(), javadocRange.getOffset() + javadocRange.getLength());
1076+
1077+
if (!isNotEmpty(rawJavadoc)) {
1078+
return "";
1079+
}
1080+
1081+
// Just clean and return - let LLM understand the full context
1082+
String cleaned = cleanJavadocComment(rawJavadoc);
1083+
cleaned = removeHtmlTags(cleaned);
1084+
return convertHtmlEntities(cleaned);
1085+
1086+
} catch (Exception e) {
1087+
return "";
1088+
}
1089+
}
1090+
10111091
/**
10121092
* Extract the main description part from JavaDoc (before @tags)
10131093
*/
@@ -1050,13 +1130,7 @@ private static String getFirstSentenceOrLimit(String text, int maxLength) {
10501130
}
10511131

10521132
// Find first sentence boundary (., !, ?)
1053-
int[] boundaries = {text.indexOf(". "), text.indexOf(".\n"), text.indexOf("! "), text.indexOf("? ")};
1054-
int firstSentenceEnd = -1;
1055-
for (int boundary : boundaries) {
1056-
if (boundary != -1 && (firstSentenceEnd == -1 || boundary < firstSentenceEnd)) {
1057-
firstSentenceEnd = boundary;
1058-
}
1059-
}
1133+
int firstSentenceEnd = findFirstSentenceBoundary(text);
10601134

10611135
// Return first sentence if within reasonable length
10621136
if (firstSentenceEnd != -1 && firstSentenceEnd < maxLength) {
@@ -1072,12 +1146,67 @@ private static String getFirstSentenceOrLimit(String text, int maxLength) {
10721146

10731147
return text.trim();
10741148
}
1149+
1150+
/**
1151+
* Find the first sentence boundary in text
1152+
*/
1153+
private static int findFirstSentenceBoundary(String text) {
1154+
int[] boundaries = {text.indexOf(". "), text.indexOf(".\n"), text.indexOf("! "), text.indexOf("? ")};
1155+
int result = -1;
1156+
for (int boundary : boundaries) {
1157+
if (boundary != -1 && (result == -1 || boundary < result)) {
1158+
result = boundary;
1159+
}
1160+
}
1161+
return result;
1162+
}
1163+
1164+
/**
1165+
* Find the best breakpoint for truncating text within a range
1166+
*/
1167+
private static int findBestBreakpoint(String text, int minPos, int maxPos) {
1168+
int[] boundaries = {
1169+
text.indexOf(". ", minPos),
1170+
text.indexOf(".\n", minPos),
1171+
text.indexOf("! ", minPos),
1172+
text.indexOf("? ", minPos)
1173+
};
1174+
1175+
int result = -1;
1176+
for (int boundary : boundaries) {
1177+
if (boundary != -1 && boundary < maxPos && (result == -1 || boundary < result)) {
1178+
result = boundary;
1179+
}
1180+
}
1181+
return result;
1182+
}
10751183

10761184
/**
1077-
* Extract summary description from field JavaDoc
1185+
* Extract field JavaDoc content directly for LLM consumption.
1186+
* Returns cleaned JavaDoc without artificial truncation - let LLM understand the full context.
10781187
*/
10791188
private static String extractFieldJavaDocSummary(org.eclipse.jdt.core.IField field) {
1080-
return extractJavaDocSummaryFromElement(field);
1189+
try {
1190+
org.eclipse.jdt.core.ISourceRange javadocRange = field.getJavadocRange();
1191+
if (javadocRange == null) {
1192+
return "";
1193+
}
1194+
1195+
String rawJavadoc = field.getCompilationUnit().getSource()
1196+
.substring(javadocRange.getOffset(), javadocRange.getOffset() + javadocRange.getLength());
1197+
1198+
if (!isNotEmpty(rawJavadoc)) {
1199+
return "";
1200+
}
1201+
1202+
// Just clean and return - let LLM understand the full context
1203+
String cleaned = cleanJavadocComment(rawJavadoc);
1204+
cleaned = removeHtmlTags(cleaned);
1205+
return convertHtmlEntities(cleaned);
1206+
1207+
} catch (Exception e) {
1208+
return "";
1209+
}
10811210
}
10821211

10831212
/**
@@ -1094,8 +1223,6 @@ public static String generateFieldSignature(org.eclipse.jdt.core.IField field) {
10941223
return generateFieldSignatureInternal(field, false);
10951224
}
10961225

1097-
1098-
10991226
/**
11001227
* Convert JDT type signature to human-readable format
11011228
*/
@@ -1228,30 +1355,7 @@ private static String simplifyTypeName(String qualifiedName) {
12281355
return lastDot == -1 ? qualifiedName : qualifiedName.substring(lastDot + 1);
12291356
}
12301357

1231-
/**
1232-
* Unified JavaDoc summary extractor for methods and fields
1233-
*/
1234-
private static String extractJavaDocSummaryFromElement(org.eclipse.jdt.core.IMember element) {
1235-
try {
1236-
org.eclipse.jdt.core.ISourceRange javadocRange = element.getJavadocRange();
1237-
if (javadocRange == null) {
1238-
return "";
1239-
}
1240-
1241-
String rawJavadoc = element.getCompilationUnit().getSource()
1242-
.substring(javadocRange.getOffset(), javadocRange.getOffset() + javadocRange.getLength());
1243-
1244-
if (rawJavadoc == null || rawJavadoc.isEmpty()) {
1245-
return "";
1246-
}
1247-
1248-
String cleaned = cleanJavadocComment(rawJavadoc);
1249-
String description = extractJavadocDescription(cleaned);
1250-
return getFirstSentenceOrLimit(description, 120);
1251-
} catch (Exception e) {
1252-
return "";
1253-
}
1254-
}
1358+
12551359

12561360
/**
12571361
* Unified method signature generator (handles both source and binary types)

0 commit comments

Comments
 (0)