@@ -231,16 +231,14 @@ private static boolean isCommonJdkType(String typeName) {
231231 return false ;
232232 }
233233
234- // Extract package name from fully qualified type name
235234 int lastDotIndex = typeName .lastIndexOf ('.' );
236235 if (lastDotIndex == -1 ) {
237- return false ; // No package (default package)
236+ return false ;
238237 }
239238
240239 String packageName = typeName .substring (0 , lastDotIndex );
241240
242- // Check if package matches any common JDK package
243- // This includes both exact matches and sub-packages
241+ // Check exact match or sub-package match
244242 return SKIP_COMMON_JDK_PACKAGES .contains (packageName ) ||
245243 SKIP_COMMON_JDK_PACKAGES .stream ().anyMatch (pkg -> packageName .startsWith (pkg + "." ));
246244 }
@@ -862,13 +860,15 @@ public static String generateClassDescription(org.eclipse.jdt.core.IType type, S
862860 // ================ JavaDoc Extraction Methods ================
863861
864862 /**
865- * Extracts relevant code snippets from Javadoc.
866- * This method is optimized to extract code from `<code>` tags and markdown code fences,
867- * and formats them in an LLM-readable format.
863+ * Extracts relevant JavaDoc content including description text and code snippets.
864+ * This method extracts:
865+ * 1. Class description (first paragraph of text)
866+ * 2. Code snippets from <code>, <pre>, and ``` blocks
867+ * 3. @deprecated tag if present
868868 *
869869 * @param type the type to extract Javadoc from.
870870 * @param monitor the progress monitor.
871- * @return A string containing all found code snippets, formatted as markdown code blocks .
871+ * @return A string containing description and code snippets in LLM-readable format .
872872 */
873873 private static String extractRelevantJavaDocContent (org .eclipse .jdt .core .IType type , IProgressMonitor monitor ) {
874874 try {
@@ -883,7 +883,6 @@ private static String extractRelevantJavaDocContent(org.eclipse.jdt.core.IType t
883883 }
884884
885885 String rawJavadoc ;
886- boolean isHtml = false ;
887886
888887 // Extract JavaDoc from source code (fast - no I/O, no network, no HTML parsing)
889888 org .eclipse .jdt .core .ISourceRange javadocRange = type .getJavadocRange ();
@@ -896,49 +895,89 @@ private static String extractRelevantJavaDocContent(org.eclipse.jdt.core.IType t
896895 return "" ;
897896 }
898897
899- StringBuilder allCodeSnippets = new StringBuilder ();
898+ StringBuilder result = new StringBuilder ();
900899 Set <String > seenCodeSnippets = new HashSet <>();
900+
901+ // Clean Javadoc comment for processing
902+ String cleanedJavadoc = cleanJavadocComment (rawJavadoc );
903+ cleanedJavadoc = removeHtmlTags (cleanedJavadoc );
904+ cleanedJavadoc = convertHtmlEntities (cleanedJavadoc );
905+
906+ // === High Priority: Extract class description text (first paragraph) ===
907+ String description = extractClassDescription (cleanedJavadoc );
908+ if (isNotEmpty (description )) {
909+ result .append ("Description:\n " ).append (description ).append ("\n \n " );
910+ }
901911
912+ // === Extract code snippets ===
902913 // 1. Extract markdown code blocks (```...```)
903914 Matcher markdownMatcher = MARKDOWN_CODE_PATTERN .matcher (rawJavadoc );
904915 while (markdownMatcher .find ()) {
905916 String code = markdownMatcher .group (1 ).trim ();
906917 if (isNotEmpty (code ) && seenCodeSnippets .add (code )) {
907- allCodeSnippets .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
918+ result .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
908919 }
909920 }
910921
911922 // 2. Extract HTML <pre> and <code> blocks
912- // Clean Javadoc comment for HTML extraction
913- String cleanedForHtml = isHtml ? rawJavadoc : cleanJavadocComment (rawJavadoc );
914- cleanedForHtml = convertHtmlEntities (cleanedForHtml );
915-
916923 // Priority 1: <pre> blocks (often contain well-formatted code)
917- Matcher preMatcher = HTML_PRE_PATTERN .matcher (cleanedForHtml );
924+ Matcher preMatcher = HTML_PRE_PATTERN .matcher (cleanedJavadoc );
918925 while (preMatcher .find ()) {
919926 String code = preMatcher .group (1 ).replaceAll ("(?i)<code[^>]*>" , "" ).replaceAll ("(?i)</code>" , "" ).trim ();
920927 if (isNotEmpty (code ) && seenCodeSnippets .add (code )) {
921- allCodeSnippets .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
928+ result .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
922929 }
923930 }
924931
925932 // Priority 2: <code> blocks (for inline snippets)
926- Matcher codeMatcher = HTML_CODE_PATTERN .matcher (cleanedForHtml );
933+ Matcher codeMatcher = HTML_CODE_PATTERN .matcher (cleanedJavadoc );
927934 while (codeMatcher .find ()) {
928935 String code = codeMatcher .group (1 ).trim ();
929936 // Use HashSet for O(1) duplicate checking
930937 if (isNotEmpty (code ) && seenCodeSnippets .add (code )) {
931- allCodeSnippets .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
938+ result .append ("```java\n " ).append (code ).append ("\n ```\n \n " );
932939 }
933940 }
934941
935- return allCodeSnippets .toString ().trim ();
942+ return result .toString ().trim ();
936943
937944 } catch (Exception e ) {
938945 JdtlsExtActivator .logException ("Error extracting relevant JavaDoc content for: " + type .getElementName (), e );
939946 return "" ;
940947 }
941948 }
949+
950+ /**
951+ * Extract the main description paragraph from class JavaDoc (before @tags and code blocks).
952+ * Returns the first paragraph of descriptive text, limited to reasonable length.
953+ */
954+ private static String extractClassDescription (String cleanedJavadoc ) {
955+ if (cleanedJavadoc == null || cleanedJavadoc .isEmpty ()) {
956+ return "" ;
957+ }
958+
959+ // Remove code blocks first to get pure text
960+ String textOnly = cleanedJavadoc ;
961+ textOnly = MARKDOWN_CODE_PATTERN .matcher (textOnly ).replaceAll ("" );
962+ textOnly = HTML_PRE_PATTERN .matcher (textOnly ).replaceAll ("" );
963+ textOnly = HTML_CODE_PATTERN .matcher (textOnly ).replaceAll ("" );
964+
965+ // Extract description before @tags
966+ String description = extractJavadocDescription (textOnly );
967+
968+ // Limit to ~2000 characters
969+ if (description .length () > 2000 ) {
970+ int breakPoint = findBestBreakpoint (description , 1500 , 2100 );
971+ if (breakPoint != -1 ) {
972+ description = description .substring (0 , breakPoint + 1 ).trim ();
973+ } else {
974+ int lastSpace = description .lastIndexOf (' ' , 2000 );
975+ description = description .substring (0 , lastSpace > 1500 ? lastSpace : 2000 ).trim () + "..." ;
976+ }
977+ }
978+
979+ return description .trim ();
980+ }
942981
943982 /**
944983 * Clean up raw JavaDoc comment by removing comment markers and asterisks
@@ -987,27 +1026,68 @@ private static String convertHtmlEntities(String text) {
9871026 if (text == null || text .isEmpty ()) {
9881027 return text ;
9891028 }
990- String result = text ;
991- result = result .replace (" " , " " );
992- result = result .replace ("<" , "<" );
993- result = result .replace (">" , ">" );
994- result = result .replace ("&" , "&" );
995- result = result .replace (""" , "\" " );
996- result = result .replace ("'" , "'" );
997- result = result .replace ("'" , "'" );
998- result = result .replace ("—" , "-" );
999- result = result .replace ("–" , "-" );
1000- return result ;
1029+ return text .replace (" " , " " )
1030+ .replace ("<" , "<" )
1031+ .replace (">" , ">" )
1032+ .replace ("&" , "&" )
1033+ .replace (""" , "\" " )
1034+ .replace ("'" , "'" )
1035+ .replace ("'" , "'" )
1036+ .replace ("—" , "-" )
1037+ .replace ("–" , "-" );
10011038 }
10021039
10031040 /**
1004- * Extract summary description from method JavaDoc
1005- * Returns the first sentence or paragraph of the JavaDoc as a brief description
1041+ * Remove all HTML tags from text, keeping only plain text content.
1042+ * Preserves line breaks for block-level tags like <p>, <br>, <div>.
10061043 */
1007- private static String extractMethodJavaDocSummary (IMethod method ) {
1008- return extractJavaDocSummaryFromElement (method );
1044+ private static String removeHtmlTags (String text ) {
1045+ if (text == null || text .isEmpty ()) {
1046+ return text ;
1047+ }
1048+
1049+ // Replace block-level tags with line breaks
1050+ text = text .replaceAll ("(?i)</(p|div|li)>|<br\\ s*/?>|<p[^>]*>" , "\n " );
1051+
1052+ // Remove all remaining HTML tags
1053+ text = text .replaceAll ("<[^>]+>" , "" );
1054+
1055+ // Clean up whitespace: collapse spaces, trim lines, limit line breaks
1056+ text = text .replaceAll ("[ \\ t]+" , " " )
1057+ .replaceAll (" *\\ n *" , "\n " )
1058+ .replaceAll ("\\ n{3,}" , "\n \n " );
1059+
1060+ return text .trim ();
10091061 }
10101062
1063+ /**
1064+ * Extract method JavaDoc content directly for LLM consumption.
1065+ * Returns cleaned JavaDoc without artificial truncation - let LLM understand the full context.
1066+ */
1067+ private static String extractMethodJavaDocSummary (IMethod method ) {
1068+ try {
1069+ org .eclipse .jdt .core .ISourceRange javadocRange = method .getJavadocRange ();
1070+ if (javadocRange == null ) {
1071+ return "" ;
1072+ }
1073+
1074+ String rawJavadoc = method .getCompilationUnit ().getSource ()
1075+ .substring (javadocRange .getOffset (), javadocRange .getOffset () + javadocRange .getLength ());
1076+
1077+ if (!isNotEmpty (rawJavadoc )) {
1078+ return "" ;
1079+ }
1080+
1081+ // Just clean and return - let LLM understand the full context
1082+ String cleaned = cleanJavadocComment (rawJavadoc );
1083+ cleaned = removeHtmlTags (cleaned );
1084+ return convertHtmlEntities (cleaned );
1085+
1086+ } catch (Exception e ) {
1087+ return "" ;
1088+ }
1089+ }
1090+
10111091 /**
10121092 * Extract the main description part from JavaDoc (before @tags)
10131093 */
@@ -1050,13 +1130,7 @@ private static String getFirstSentenceOrLimit(String text, int maxLength) {
10501130 }
10511131
10521132 // Find first sentence boundary (., !, ?)
1053- int [] boundaries = {text .indexOf (". " ), text .indexOf (".\n " ), text .indexOf ("! " ), text .indexOf ("? " )};
1054- int firstSentenceEnd = -1 ;
1055- for (int boundary : boundaries ) {
1056- if (boundary != -1 && (firstSentenceEnd == -1 || boundary < firstSentenceEnd )) {
1057- firstSentenceEnd = boundary ;
1058- }
1059- }
1133+ int firstSentenceEnd = findFirstSentenceBoundary (text );
10601134
10611135 // Return first sentence if within reasonable length
10621136 if (firstSentenceEnd != -1 && firstSentenceEnd < maxLength ) {
@@ -1072,12 +1146,67 @@ private static String getFirstSentenceOrLimit(String text, int maxLength) {
10721146
10731147 return text .trim ();
10741148 }
1149+
1150+ /**
1151+ * Find the first sentence boundary in text
1152+ */
1153+ private static int findFirstSentenceBoundary (String text ) {
1154+ int [] boundaries = {text .indexOf (". " ), text .indexOf (".\n " ), text .indexOf ("! " ), text .indexOf ("? " )};
1155+ int result = -1 ;
1156+ for (int boundary : boundaries ) {
1157+ if (boundary != -1 && (result == -1 || boundary < result )) {
1158+ result = boundary ;
1159+ }
1160+ }
1161+ return result ;
1162+ }
1163+
1164+ /**
1165+ * Find the best breakpoint for truncating text within a range
1166+ */
1167+ private static int findBestBreakpoint (String text , int minPos , int maxPos ) {
1168+ int [] boundaries = {
1169+ text .indexOf (". " , minPos ),
1170+ text .indexOf (".\n " , minPos ),
1171+ text .indexOf ("! " , minPos ),
1172+ text .indexOf ("? " , minPos )
1173+ };
1174+
1175+ int result = -1 ;
1176+ for (int boundary : boundaries ) {
1177+ if (boundary != -1 && boundary < maxPos && (result == -1 || boundary < result )) {
1178+ result = boundary ;
1179+ }
1180+ }
1181+ return result ;
1182+ }
10751183
10761184 /**
1077- * Extract summary description from field JavaDoc
1185+ * Extract field JavaDoc content directly for LLM consumption.
1186+ * Returns cleaned JavaDoc without artificial truncation - let LLM understand the full context.
10781187 */
10791188 private static String extractFieldJavaDocSummary (org .eclipse .jdt .core .IField field ) {
1080- return extractJavaDocSummaryFromElement (field );
1189+ try {
1190+ org .eclipse .jdt .core .ISourceRange javadocRange = field .getJavadocRange ();
1191+ if (javadocRange == null ) {
1192+ return "" ;
1193+ }
1194+
1195+ String rawJavadoc = field .getCompilationUnit ().getSource ()
1196+ .substring (javadocRange .getOffset (), javadocRange .getOffset () + javadocRange .getLength ());
1197+
1198+ if (!isNotEmpty (rawJavadoc )) {
1199+ return "" ;
1200+ }
1201+
1202+ // Just clean and return - let LLM understand the full context
1203+ String cleaned = cleanJavadocComment (rawJavadoc );
1204+ cleaned = removeHtmlTags (cleaned );
1205+ return convertHtmlEntities (cleaned );
1206+
1207+ } catch (Exception e ) {
1208+ return "" ;
1209+ }
10811210 }
10821211
10831212 /**
@@ -1094,8 +1223,6 @@ public static String generateFieldSignature(org.eclipse.jdt.core.IField field) {
10941223 return generateFieldSignatureInternal (field , false );
10951224 }
10961225
1097-
1098-
10991226 /**
11001227 * Convert JDT type signature to human-readable format
11011228 */
@@ -1228,30 +1355,7 @@ private static String simplifyTypeName(String qualifiedName) {
12281355 return lastDot == -1 ? qualifiedName : qualifiedName .substring (lastDot + 1 );
12291356 }
12301357
1231- /**
1232- * Unified JavaDoc summary extractor for methods and fields
1233- */
1234- private static String extractJavaDocSummaryFromElement (org .eclipse .jdt .core .IMember element ) {
1235- try {
1236- org .eclipse .jdt .core .ISourceRange javadocRange = element .getJavadocRange ();
1237- if (javadocRange == null ) {
1238- return "" ;
1239- }
1240-
1241- String rawJavadoc = element .getCompilationUnit ().getSource ()
1242- .substring (javadocRange .getOffset (), javadocRange .getOffset () + javadocRange .getLength ());
1243-
1244- if (rawJavadoc == null || rawJavadoc .isEmpty ()) {
1245- return "" ;
1246- }
1247-
1248- String cleaned = cleanJavadocComment (rawJavadoc );
1249- String description = extractJavadocDescription (cleaned );
1250- return getFirstSentenceOrLimit (description , 120 );
1251- } catch (Exception e ) {
1252- return "" ;
1253- }
1254- }
1358+
12551359
12561360 /**
12571361 * Unified method signature generator (handles both source and binary types)
0 commit comments