Skip to content

Commit ab2b5d0

Browse files
Merge branch 'master' into fix_session_pool_del_before_lock
2 parents 0919c79 + 78cb18d commit ab2b5d0

File tree

12 files changed

+405
-93
lines changed

12 files changed

+405
-93
lines changed

.github/workflows/deploy_release.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@ on:
77

88
jobs:
99
ci:
10-
runs-on: ubuntu-22.04
10+
runs-on: ubuntu-latest
1111

1212
steps:
13-
- uses: actions/checkout@v3
14-
- uses: pdm-project/setup-pdm@v3
13+
- uses: actions/checkout@v5
14+
- uses: pdm-project/setup-pdm@v4
1515
with:
16-
python-version: '3.x'
1716
cache: true
1817
- name: Install dependencies
1918
run: pdm install

.github/workflows/run_test.yaml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ on:
2121
jobs:
2222
ci-pip-install-from-source:
2323
# This is to verify the setup.py as a mitigation for remain python 3.6.2+ capability
24-
runs-on: ubuntu-20.04
24+
runs-on: ubuntu-22.04
2525
strategy:
2626
max-parallel: 2
2727
matrix:
28-
python-version: [3.6, 3.7]
28+
python-version: [3.8, 3.11]
2929
steps:
30-
- uses: actions/checkout@v3
30+
- uses: actions/checkout@v5
3131
- name: Set up Python ${{ matrix.python-version }}
32-
uses: actions/setup-python@v4
32+
uses: actions/setup-python@v6
3333
with:
3434
python-version: ${{ matrix.python-version }}
3535
- name: Install nebulagraph-python from source and test dependencies
@@ -52,7 +52,7 @@ jobs:
5252
strategy:
5353
max-parallel: 2
5454
matrix:
55-
python-version: [3.8, 3.9, '3.10', 3.11]
55+
python-version: ['3.10', 3.11, 3.12, 3.13]
5656

5757
steps:
5858
- name: Maximize runner space
@@ -63,9 +63,9 @@ jobs:
6363
remove-android: 'true'
6464
remove-haskell: 'true'
6565

66-
- uses: actions/checkout@v3
66+
- uses: actions/checkout@v5
6767
- name: Set up Python ${{ matrix.python-version }}
68-
uses: pdm-project/setup-pdm@v3
68+
uses: pdm-project/setup-pdm@v4
6969
with:
7070
python-version: ${{ matrix.python-version }}
7171
cache: true
@@ -94,14 +94,14 @@ jobs:
9494
files: coverage.xml
9595

9696
example-test:
97-
runs-on: ubuntu-latest
97+
runs-on: ubuntu-22.04
9898
strategy:
9999
matrix:
100-
python-version: [3.11, 3.12]
100+
python-version: ['3.10', 3.11, 3.12, 3.13]
101101
steps:
102-
- uses: actions/checkout@v3
102+
- uses: actions/checkout@v5
103103
- name: Set up Python ${{ matrix.python-version }}
104-
uses: pdm-project/setup-pdm@v3
104+
uses: pdm-project/setup-pdm@v4
105105
with:
106106
python-version: ${{ matrix.python-version }}
107107
cache: true

nebula3/gclient/net/base.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,6 @@ def _cast_value(value: Any) -> Value:
8484
casted_value.set_sVal(value)
8585
elif isinstance(value, float):
8686
casted_value.set_fVal(value)
87-
elif isinstance(value, datetime.date):
88-
date_value = Date(year=value.year, month=value.month, day=value.day)
89-
casted_value.set_dVal(date_value)
90-
elif isinstance(value, datetime.time):
91-
time_value = Time(
92-
hour=value.hour,
93-
minute=value.minute,
94-
sec=value.second,
95-
microsec=value.microsecond,
96-
)
97-
casted_value.set_tVal(time_value)
9887
elif isinstance(value, datetime.datetime):
9988
datetime_value = DateTime(
10089
year=value.year,
@@ -106,6 +95,17 @@ def _cast_value(value: Any) -> Value:
10695
microsec=value.microsecond,
10796
)
10897
casted_value.set_dtVal(datetime_value)
98+
elif isinstance(value, datetime.date):
99+
date_value = Date(year=value.year, month=value.month, day=value.day)
100+
casted_value.set_dVal(date_value)
101+
elif isinstance(value, datetime.time):
102+
time_value = Time(
103+
hour=value.hour,
104+
minute=value.minute,
105+
sec=value.second,
106+
microsec=value.microsecond,
107+
)
108+
casted_value.set_tVal(time_value)
109109
# TODO: add support for GeoSpatial
110110
elif isinstance(value, list):
111111
byte_list = []

nebula3/sclient/GraphStorageClient.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
The client to scan vertex and edge from storage,
1111
the return data is from thr graph database
1212
"""
13+
1314
import sys
15+
import concurrent.futures
1416

1517
from nebula3.sclient.ScanResult import ScanResult
1618
from nebula3.sclient.net import GraphStorageConnection
@@ -192,6 +194,69 @@ def scan_vertex_with_part(
192194
partial_success,
193195
)
194196

197+
# TODO: 1.Native async or PyO3
198+
# 2.Error Handling
199+
# 3.Statistical indicators
200+
def scan_vertex_async(
201+
self,
202+
space_name,
203+
tag_name,
204+
prop_names=[],
205+
start_time=DEFAULT_START_TIME,
206+
end_time=DEFAULT_END_TIME,
207+
where=None,
208+
only_latest_version=False,
209+
enable_read_from_follower=True,
210+
partial_success=False,
211+
batch_size=1000,
212+
max_workers=8,
213+
):
214+
"""
215+
scan_vertex_async:Multi-partition concurrency and streaming batch yield
216+
217+
:param space_name: the space name
218+
:param tag_name: the tag name
219+
:param prop_names: if given empty, return all property
220+
:param start_time: the min version of vertex
221+
:param end_time: the max version of vertex
222+
:param where: now is unsupported
223+
:param only_latest_version: when storage enable multi versions and only_latest_version is true,
224+
only return latest version.
225+
when storage disable multi versions, just use the default value.
226+
:param enable_read_from_follower: if set to false, forbid follower read
227+
:param partial_success: if set true, when partial success, it will continue until finish
228+
:param batch_size: The number of points in each batch (passed to scan_vertex_with_part)
229+
:param max_workers: Number of concurrent threads
230+
:yield: part_id, VertexResult # Each batch of data
231+
232+
"""
233+
part_leaders = self._meta_cache.get_part_leaders(space_name)
234+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
235+
future_to_part = {}
236+
for part, leader in part_leaders.items():
237+
future = executor.submit(
238+
self.scan_vertex_with_part,
239+
space_name,
240+
part,
241+
tag_name,
242+
prop_names,
243+
batch_size, # The limit passed to scan_vertex_with_part
244+
start_time,
245+
end_time,
246+
where,
247+
only_latest_version,
248+
enable_read_from_follower,
249+
partial_success,
250+
)
251+
future_to_part[future] = part
252+
253+
for future in concurrent.futures.as_completed(future_to_part):
254+
part = future_to_part[future]
255+
scan_result = future.result() # ScanResult
256+
while scan_result is not None and scan_result.has_next():
257+
batch = scan_result.next()
258+
yield part, batch
259+
195260
def _scan_vertex(
196261
self,
197262
space_name,
@@ -337,6 +402,65 @@ def scan_edge_with_part(
337402
partial_success,
338403
)
339404

405+
def scan_edge_async(
406+
self,
407+
space_name,
408+
edge_name,
409+
prop_names=[],
410+
start_time=DEFAULT_START_TIME,
411+
end_time=DEFAULT_END_TIME,
412+
where=None,
413+
only_latest_version=False,
414+
enable_read_from_follower=True,
415+
partial_success=False,
416+
batch_size=1000,
417+
max_workers=8,
418+
):
419+
"""
420+
scan_edge_async:Multi-partition concurrency and streaming batch yield
421+
422+
:param space_name: the space name
423+
:param prop_names: if given empty, return all property
424+
:param edge_name: the edge name
425+
:param start_time: the min version of edge
426+
:param end_time: the max version of edge
427+
:param where: now is unsupported
428+
:param only_latest_version: when storage enable multi versions and only_latest_version is true,
429+
only return latest version.
430+
when storage disable multi versions, just use the default value.
431+
:param enable_read_from_follower: if set to false, forbid follower read
432+
:param partial_success: if set true, when partial success, it will continue until finish
433+
:param batch_size: The number of edges per batch (passed to scan_edge_with_part)
434+
:param max_workers: Number of concurrent threads
435+
:yield: part_id, EdgeResult # Each batch of data
436+
"""
437+
part_leaders = self._meta_cache.get_part_leaders(space_name)
438+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
439+
future_to_part = {}
440+
for part, leader in part_leaders.items():
441+
future = executor.submit(
442+
self.scan_edge_with_part,
443+
space_name,
444+
part,
445+
edge_name,
446+
prop_names,
447+
batch_size,
448+
start_time,
449+
end_time,
450+
where,
451+
only_latest_version,
452+
enable_read_from_follower,
453+
partial_success,
454+
)
455+
future_to_part[future] = part
456+
457+
for future in concurrent.futures.as_completed(future_to_part):
458+
part = future_to_part[future]
459+
scan_result = future.result()
460+
while scan_result is not None and scan_result.has_next():
461+
batch = scan_result.next()
462+
yield part, batch
463+
340464
def _scan_edge(
341465
self,
342466
space_name,

nebula3/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .hash import hash

nebula3/utils/hash.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# nebula3/hash.py
2+
from __future__ import annotations
3+
4+
_M: int = 0xC6A4A7935BD1E995
5+
_R: int = 47
6+
_MASK64: int = (1 << 64) - 1
7+
8+
9+
def _read_u64_le(buf: bytes) -> int:
10+
"""Convert little-endian bytes of up to 8 bytes to an unsigned integer."""
11+
return int.from_bytes(buf, byteorder="little", signed=False)
12+
13+
14+
def hash(data: bytes | str, seed: int = 0xC70F6907) -> int:
15+
"""MurmurHash2 64-bit variant:
16+
:Param data: supports str (utf-8 encoding), bytes, bytearray
17+
:Param seed: defaults to 0xC70F6907
18+
:return: Python int, in the range of signed 64-bit
19+
"""
20+
if isinstance(data, str):
21+
data_as_bytes = data.encode("utf-8")
22+
elif isinstance(data, (bytes, bytearray)):
23+
data_as_bytes = bytes(data)
24+
else:
25+
raise TypeError("Input must be str, bytes, or bytearray")
26+
27+
h = (seed ^ (_M * len(data_as_bytes) & _MASK64)) & _MASK64
28+
off = len(data_as_bytes) // 8 * 8
29+
for i in range(0, off, 8):
30+
k = _read_u64_le(data_as_bytes[i : i + 8])
31+
k = (k * _M) & _MASK64
32+
k ^= k >> _R
33+
k = (k * _M) & _MASK64
34+
h ^= k
35+
h = (h * _M) & _MASK64
36+
37+
tail = data_as_bytes[off:]
38+
if tail:
39+
t = _read_u64_le(tail)
40+
h ^= t
41+
h = (h * _M) & _MASK64
42+
43+
h ^= h >> _R
44+
h = (h * _M) & _MASK64
45+
h ^= h >> _R
46+
47+
if h & (1 << 63):
48+
h -= 1 << 64
49+
return h

0 commit comments

Comments
 (0)