Skip to content

Commit 0836278

Browse files
authored
Merge pull request #74 from BruinGrowly/cursor/review-code-for-issues-and-improvements-gemini-3-pro-preview-4500
Review code for issues and improvements
2 parents ae291a5 + 8902656 commit 0836278

File tree

3 files changed

+153
-73
lines changed

3 files changed

+153
-73
lines changed

harmonizer/config.py

Lines changed: 95 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import os
77
from dataclasses import dataclass, field
8-
from typing import Any, Dict, List
8+
from typing import Any, Dict, List, Optional, Tuple
99

1010
# Try to import tomli for TOML parsing
1111
try:
@@ -48,48 +48,55 @@ class HarmonizerConfig:
4848

4949
# Analysis
5050
complexity_weight: float = 0.2 # For dynamic simulation
51+
custom_vocabulary: Dict[str, str] = field(default_factory=dict)
52+
source_path: Optional[str] = None
53+
root_dir: Optional[str] = None
5154

5255

5356
class ConfigLoader:
57+
_YAML_FILENAMES = (
58+
".harmonizer.yml",
59+
".harmonizer.yaml",
60+
"harmonizer.yml",
61+
"harmonizer.yaml",
62+
)
63+
5464
@staticmethod
55-
def load(target_dir: str = ".") -> HarmonizerConfig:
65+
def load(target_dir: str = ".", search_parents: bool = False) -> HarmonizerConfig:
5666
"""
5767
Load configuration from target directory.
5868
Priority:
59-
1. harmonizer.yaml
69+
1. harmonizer.yaml / .harmonizer.yaml / .harmonizer.yml
6070
2. pyproject.toml
6171
3. Defaults
6272
"""
6373
config = HarmonizerConfig()
74+
config.root_dir = os.path.abspath(target_dir)
6475

65-
# 1. Try harmonizer.yaml
66-
yaml_path = os.path.join(target_dir, "harmonizer.yaml")
67-
if os.path.exists(yaml_path) and yaml:
68-
try:
69-
with open(yaml_path, "r", encoding="utf-8") as f:
70-
data = yaml.safe_load(f)
71-
if data:
72-
ConfigLoader._update_config(config, data)
73-
print(f"Loaded config from {yaml_path}")
74-
return config
75-
except Exception as e:
76-
print(f"Warning: Failed to load {yaml_path}: {e}")
77-
78-
# 2. Try pyproject.toml
79-
toml_path = os.path.join(target_dir, "pyproject.toml")
80-
if os.path.exists(toml_path) and tomli:
81-
try:
82-
with open(toml_path, "rb") as f:
83-
data = tomli.load(f)
84-
tool_config = data.get("tool", {}).get("harmonizer", {})
85-
if tool_config:
86-
ConfigLoader._update_config(config, tool_config)
87-
print(f"Loaded config from {toml_path}")
88-
except Exception as e:
89-
print(f"Warning: Failed to load {toml_path}: {e}")
76+
config_path, config_type = ConfigLoader._locate_config_path(target_dir, search_parents)
77+
if not config_path:
78+
return config
9079

80+
try:
81+
if config_type == "toml":
82+
ConfigLoader._load_from_pyproject(config, config_path)
83+
else:
84+
ConfigLoader._load_from_yaml(config, config_path)
85+
except Exception as exc: # pragma: no cover - defensive logging
86+
print(f"Warning: Failed to load {config_path}: {exc}")
87+
return config
88+
89+
config.source_path = config_path
90+
config.root_dir = os.path.dirname(config_path)
9191
return config
9292

93+
@staticmethod
94+
def load_nearest(start_dir: str = ".") -> HarmonizerConfig:
95+
"""
96+
Load configuration searching parent directories for the first config file.
97+
"""
98+
return ConfigLoader.load(start_dir, search_parents=True)
99+
93100
@staticmethod
94101
def _update_config(config: HarmonizerConfig, data: Dict[str, Any]):
95102
"""Update config object with dictionary data"""
@@ -102,14 +109,69 @@ def _update_config(config: HarmonizerConfig, data: Dict[str, Any]):
102109
if "min_density" in t:
103110
config.min_density = float(t["min_density"])
104111

112+
if "analysis" in data:
113+
a = data["analysis"]
114+
if "complexity_weight" in a:
115+
config.complexity_weight = float(a["complexity_weight"])
116+
105117
if "paths" in data:
106118
p = data["paths"]
107119
if "exclude" in p:
108-
config.exclude_patterns = p["exclude"]
120+
config.exclude_patterns = list(p["exclude"])
109121
if "report" in p:
110122
config.report_output = p["report"]
111123

112-
if "analysis" in data:
113-
a = data["analysis"]
114-
if "complexity_weight" in a:
115-
config.complexity_weight = float(a["complexity_weight"])
124+
if "exclude" in data:
125+
config.exclude_patterns = list(data["exclude"])
126+
127+
if "custom_vocabulary" in data:
128+
custom_vocab = data.get("custom_vocabulary") or {}
129+
# Merge so later sources can override defaults
130+
config.custom_vocabulary.update(custom_vocab)
131+
132+
@staticmethod
133+
def _locate_config_path(
134+
start_dir: str, search_parents: bool
135+
) -> Tuple[Optional[str], Optional[str]]:
136+
current_dir = os.path.abspath(start_dir)
137+
while True:
138+
for filename in ConfigLoader._YAML_FILENAMES:
139+
candidate = os.path.join(current_dir, filename)
140+
if os.path.exists(candidate) and yaml:
141+
return candidate, "yaml"
142+
143+
toml_path = os.path.join(current_dir, "pyproject.toml")
144+
if os.path.exists(toml_path) and tomli:
145+
return toml_path, "toml"
146+
147+
if not search_parents:
148+
break
149+
150+
parent_dir = os.path.dirname(current_dir)
151+
if parent_dir == current_dir:
152+
break
153+
current_dir = parent_dir
154+
155+
return None, None
156+
157+
@staticmethod
158+
def _load_from_yaml(config: HarmonizerConfig, path: str) -> None:
159+
if not yaml:
160+
raise RuntimeError("PyYAML is not installed")
161+
162+
with open(path, "r", encoding="utf-8") as handle:
163+
data = yaml.safe_load(handle) or {}
164+
if data:
165+
ConfigLoader._update_config(config, data)
166+
167+
@staticmethod
168+
def _load_from_pyproject(config: HarmonizerConfig, path: str) -> None:
169+
if not tomli:
170+
raise RuntimeError("tomli/tomllib is not available for TOML parsing")
171+
172+
with open(path, "rb") as handle:
173+
data = tomli.load(handle)
174+
175+
tool_config = data.get("tool", {}).get("harmonizer", {})
176+
if tool_config:
177+
ConfigLoader._update_config(config, tool_config)

harmonizer/divine_invitation_engine_V2.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,24 @@ class SemanticResult:
7676
class VocabularyManager:
7777
"""Optimized vocabulary management with caching"""
7878

79-
def __init__(self, custom_vocabulary: Optional[Dict[str, str]] = None):
80-
self._keyword_map: Dict[str, Dimension] = {}
79+
_BASE_KEYWORD_MAP: Dict[str, Dimension] = {}
80+
_BASE_ICE_DIMENSION_MAP: Dict[Dimension, Dimension] = {}
81+
82+
def __init__(self, custom_vocabulary: Optional[Dict[str, str]] = None, quiet: bool = True):
83+
self._quiet = quiet
8184
self._word_cache: Dict[str, Tuple[Coordinates, int]] = {}
82-
self._ice_dimension_map: Dict[Dimension, Dimension] = {}
83-
self._build_complete_vocabulary()
85+
86+
if VocabularyManager._BASE_KEYWORD_MAP:
87+
# Reuse previously built vocabulary to avoid repeated startup cost
88+
self._keyword_map = dict(VocabularyManager._BASE_KEYWORD_MAP)
89+
self._ice_dimension_map = dict(VocabularyManager._BASE_ICE_DIMENSION_MAP)
90+
else:
91+
self._keyword_map = {}
92+
self._ice_dimension_map = {}
93+
self._build_complete_vocabulary()
94+
VocabularyManager._BASE_KEYWORD_MAP = dict(self._keyword_map)
95+
VocabularyManager._BASE_ICE_DIMENSION_MAP = dict(self._ice_dimension_map)
96+
8497
if custom_vocabulary:
8598
self._apply_custom_vocabulary(custom_vocabulary)
8699

@@ -388,12 +401,13 @@ def _build_complete_vocabulary(self) -> None:
388401
self._keyword_map[word] = dimension
389402

390403
# Print to stderr to avoid breaking JSON output on stdout
391-
import sys
404+
if not self._quiet:
405+
import sys
392406

393-
print(
394-
f"VocabularyManager: Initialized with {len(self._keyword_map)} unique keywords.",
395-
file=sys.stderr,
396-
)
407+
print(
408+
f"VocabularyManager: Initialized with {len(self._keyword_map)} unique keywords.",
409+
file=sys.stderr,
410+
)
397411

398412
def analyze_text(self, text: str) -> Tuple[Coordinates, int]:
399413
"""Optimized text analysis with caching"""
@@ -899,7 +913,10 @@ def __init__(self, config: Optional[Dict] = None):
899913

900914
# Build core components
901915
custom_vocabulary = self.config.get("custom_vocabulary", {})
902-
self.vocabulary = VocabularyManager(custom_vocabulary=custom_vocabulary)
916+
verbose_vocab = bool(self.config.get("verbose_vocab"))
917+
self.vocabulary = VocabularyManager(
918+
custom_vocabulary=custom_vocabulary, quiet=not verbose_vocab
919+
)
903920
self.semantic_analyzer = SemanticAnalyzer(self.vocabulary, self.ANCHOR_POINT)
904921

905922
# Build specialized sub-engines

harmonizer/main.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,46 +34,37 @@
3434
import json # noqa: E402
3535
from typing import Dict, List, Tuple # noqa: E402
3636

37-
import yaml # noqa: E402
38-
3937
from harmonizer import divine_invitation_engine_V2 as dive # noqa: E402
4038
from harmonizer.ast_semantic_parser import AST_Semantic_Parser # noqa: E402
4139
from harmonizer.refactorer import Refactorer # noqa: E402
4240
from harmonizer.semantic_map import SemanticMapGenerator # noqa: E402
4341
from harmonizer.semantic_naming import SemanticNamingEngine # noqa: E402
42+
from harmonizer.config import ConfigLoader # noqa: E402
4443

4544
# --- CONFIGURATION LOADING ---
4645

4746

4847
def load_configuration() -> Dict:
4948
"""
50-
Searches for and loads .harmonizer.yml from the current directory
51-
up to the root.
49+
Load Harmonizer configuration by searching for the nearest YAML/TOML config.
5250
"""
53-
current_dir = os.getcwd()
54-
while True:
55-
config_path = os.path.join(current_dir, ".harmonizer.yml")
56-
if os.path.exists(config_path):
57-
try:
58-
with open(config_path, "r", encoding="utf-8") as f:
59-
config = yaml.safe_load(f)
60-
if config:
61-
# Use stderr to avoid polluting JSON output
62-
print(
63-
f"INFO: Loaded configuration from {config_path}",
64-
file=sys.stderr,
65-
)
66-
return config
67-
return {}
68-
except (yaml.YAMLError, IOError) as e:
69-
print(f"WARNING: Could not load or parse config: {e}", file=sys.stderr)
70-
return {}
71-
72-
parent_dir = os.path.dirname(current_dir)
73-
if parent_dir == current_dir: # Reached file system root
74-
break
75-
current_dir = parent_dir
76-
return {}
51+
config = ConfigLoader.load_nearest(os.getcwd())
52+
config_dict = {
53+
"exclude": list(config.exclude_patterns),
54+
"custom_vocabulary": dict(config.custom_vocabulary),
55+
"thresholds": {
56+
"max_disharmony": config.max_disharmony,
57+
"max_imbalance": config.max_imbalance,
58+
"min_density": config.min_density,
59+
},
60+
"config_root": config.root_dir or os.getcwd(),
61+
}
62+
if config.source_path:
63+
print(
64+
f"INFO: Loaded configuration from {config.source_path}",
65+
file=sys.stderr,
66+
)
67+
return config_dict
7768

7869

7970
# --- THE HARMONIZER APPLICATION ---
@@ -169,8 +160,9 @@ def _parse_code_to_ast(self, content: str, file_path: str) -> ast.AST:
169160

170161
def _analyze_all_functions(self, tree: ast.AST) -> Dict[str, Dict]:
171162
harmony_report = {}
163+
function_nodes = (ast.FunctionDef, ast.AsyncFunctionDef)
172164
for node in ast.walk(tree):
173-
if isinstance(node, ast.FunctionDef):
165+
if isinstance(node, function_nodes):
174166
function_name = node.name
175167
docstring = ast.get_docstring(node)
176168
intent_concepts = self.parser.get_intent_concepts(function_name, docstring)
@@ -424,8 +416,17 @@ def validate_cli_arguments(args: argparse.Namespace, config: Dict) -> List[str]:
424416
invalid_files = []
425417
excluded_files = []
426418
exclude_patterns = config.get("exclude", [])
419+
config_root = config.get("config_root") or os.getcwd()
427420
for file_path in args.files:
428-
if any(fnmatch.fnmatch(file_path, pattern) for pattern in exclude_patterns):
421+
normalized_path = os.path.normpath(file_path)
422+
rel_path = os.path.normpath(os.path.relpath(normalized_path, config_root))
423+
basename = os.path.basename(normalized_path)
424+
if any(
425+
fnmatch.fnmatch(normalized_path, pattern)
426+
or fnmatch.fnmatch(rel_path, pattern)
427+
or fnmatch.fnmatch(basename, pattern)
428+
for pattern in exclude_patterns
429+
):
429430
excluded_files.append(file_path)
430431
continue
431432
if os.path.exists(file_path):

0 commit comments

Comments
 (0)