|
20 | 20 |
|
21 | 21 |
|
22 | 22 | class BaseSearcher: |
23 | | - _doc_id_counter = itertools.count(100000000) |
| 23 | + _doc_id_counter = None # Will be initialized per process |
24 | 24 | MP_CONTEXT = None |
25 | 25 |
|
26 | 26 | def __init__(self, host, connection_params, search_params): |
@@ -67,15 +67,22 @@ def _search_one(cls, query, top: Optional[int] = None): |
67 | 67 | precision = len(ids.intersection(query.expected_result[:top])) / top |
68 | 68 | return precision, end - start |
69 | 69 |
|
| 70 | + @classmethod |
| 71 | + def _get_doc_id_counter(cls): |
| 72 | + if cls._doc_id_counter is None: |
| 73 | + # Use process ID to create unique starting point for each worker |
| 74 | + process_id = os.getpid() |
| 75 | + # Each process gets a unique range: 1000000000 + (pid * 1000000) |
| 76 | + start_offset = 1000000000 + (process_id % 1000) * 1000000 |
| 77 | + cls._doc_id_counter = itertools.count(start_offset) |
| 78 | + return cls._doc_id_counter |
| 79 | + |
70 | 80 | @classmethod |
71 | 81 | def _insert_one(cls, query): |
72 | 82 | start = time.perf_counter() |
73 | 83 |
|
74 | | - # Generate unique doc_id here |
75 | | - doc_id = next(cls._doc_id_counter) |
76 | | - |
77 | | - # Debug logging to verify inserts are happening |
78 | | - #print(f"DEBUG: Inserting vector with doc_id={doc_id}") |
| 84 | + # Generate unique doc_id with process-safe counter |
| 85 | + doc_id = next(cls._get_doc_id_counter()) |
79 | 86 |
|
80 | 87 | cls.insert_one(str(doc_id), query.vector, query.meta_conditions) |
81 | 88 | end = time.perf_counter() |
|
0 commit comments