Skip to content

Commit 44d3e2e

Browse files
committed
Removed dependency on NumPy - closes #146
1 parent 5bc9128 commit 44d3e2e

18 files changed

Lines changed: 177 additions & 90 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
- Changed `vector` type to return `Vector` object instead of NumPy array
55
- Removed `utils` package (use top-level `pgvector` package instead)
66
- Removed re-exported classes (use top-level `pgvector` package instead)
7+
- Removed dependency on NumPy
78
- Dropped support for Python < 3.10
89
- Dropped support for SQLAlchemy < 2
910

pgvector/bit.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
from __future__ import annotations
2-
import numpy as np
32
from struct import pack, unpack_from
43
from warnings import warn
54

5+
try:
6+
import numpy as np
7+
except ImportError:
8+
np = None
9+
610

711
class Bit:
812
def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.dtype[np.bool | np.uint8]]) -> None:
@@ -30,7 +34,7 @@ def bit_value(v: bool) -> str:
3034
data = int(value, 2).to_bytes(len(value) // 8, byteorder='big')
3135
except ValueError:
3236
raise ValueError('expected bit string')
33-
elif isinstance(value, np.ndarray):
37+
elif np is not None and isinstance(value, np.ndarray):
3438
if value.dtype != np.bool:
3539
# skip warning for result of np.unpackbits
3640
if value.dtype != np.uint8 or np.any(value > 1):
@@ -63,7 +67,8 @@ def _length(self):
6367
return length
6468

6569
def to_list(self) -> list[bool]:
66-
return self.to_numpy().tolist()
70+
# TODO improve
71+
return [v != '0' for v in self.to_text()]
6772

6873
def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.bool]]:
6974
return np.unpackbits(np.frombuffer(self._value[4:], dtype=np.uint8), count=self._length()).astype(bool)

pgvector/halfvec.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from __future__ import annotations
2-
import numpy as np
32
import struct
43

4+
try:
5+
import numpy as np
6+
except ImportError:
7+
np = None
8+
59

610
class HalfVector:
711
def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None:
@@ -11,7 +15,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float
1115
self._value = struct.pack(f'>HH{dim}e', dim, 0, *value)
1216
except struct.error:
1317
raise ValueError('expected list[float]')
14-
elif isinstance(value, np.ndarray):
18+
elif np is not None and isinstance(value, np.ndarray):
1519
if value.ndim != 1:
1620
raise ValueError('expected ndim to be 1')
1721

pgvector/sparsevec.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
from __future__ import annotations
2-
import numpy as np
32
from struct import pack, unpack_from
43
from typing import Any, overload
54

5+
try:
6+
import numpy as np
7+
except ImportError:
8+
np = None
9+
610
NO_DEFAULT = object()
711

812

pgvector/vector.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from __future__ import annotations
2-
import numpy as np
32
import struct
43

4+
try:
5+
import numpy as np
6+
except ImportError:
7+
np = None
8+
59

610
class Vector:
711
def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None:
@@ -11,7 +15,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float
1115
self._value = struct.pack(f'>HH{dim}f', dim, 0, *value)
1216
except struct.error:
1317
raise ValueError('expected list[float]')
14-
elif isinstance(value, np.ndarray):
18+
elif np is not None and isinstance(value, np.ndarray):
1519
if value.ndim != 1:
1620
raise ValueError('expected ndim to be 1')
1721

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ authors = [
1313
license = "MIT"
1414
requires-python = ">= 3.10"
1515
dependencies = [
16-
"numpy"
1716
]
1817

1918
[project.urls]
@@ -29,10 +28,13 @@ dev = [
2928
"psycopg2-binary",
3029
"pytest",
3130
"pytest-asyncio",
32-
"scipy",
3331
"SQLAlchemy[asyncio]>=2",
3432
"sqlmodel>=0.0.12"
3533
]
34+
dev-optional = [
35+
"numpy",
36+
"scipy"
37+
]
3638

3739
[tool.pytest.ini_options]
3840
asyncio_mode = "auto"

tests/test_asyncpg.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import asyncpg
2-
import numpy as np
32
from pgvector import HalfVector, SparseVector, Vector
43
from pgvector.asyncpg import register_vector
54
import pytest
@@ -16,7 +15,7 @@ async def test_vector(self):
1615
await register_vector(conn)
1716

1817
embedding = Vector([1.5, 2, 3])
19-
embedding2 = np.array([4.5, 5, 6])
18+
embedding2 = [4.5, 5, 6]
2019
await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2)
2120

2221
res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
@@ -111,7 +110,7 @@ async def test_vector_array(self):
111110
embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])]
112111
await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES ($1)", embeddings)
113112

114-
embeddings2 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])]
113+
embeddings2 = [[1.5, 2, 3], [4.5, 5, 6]]
115114
await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1])
116115

117116
res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")
@@ -133,7 +132,7 @@ async def init(conn):
133132
await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))')
134133

135134
embedding = Vector([1.5, 2, 3])
136-
embedding2 = np.array([1.5, 2, 3])
135+
embedding2 = [1.5, 2, 3]
137136
await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2)
138137

139138
res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id")

tests/test_bit.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
import numpy as np
21
from pgvector import Bit
32
import pytest
43

4+
try:
5+
import numpy as np
6+
except ImportError:
7+
np = None
8+
59

610
class TestBit:
711
def test_list(self):
@@ -29,20 +33,24 @@ def test_bytes(self):
2933
assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000'
3034
assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000'
3135

36+
@pytest.mark.skipif(np is None, reason='NumPy required')
3237
def test_ndarray(self):
3338
arr = np.array([True, False, True])
3439
assert Bit(arr).to_list() == [True, False, True]
3540
assert np.array_equal(Bit(arr).to_numpy(), arr)
3641

42+
@pytest.mark.skipif(np is None, reason='NumPy required')
3743
def test_ndarray_unpackbits(self):
3844
arr = np.unpackbits(np.array([254, 7, 0], dtype=np.uint8))
3945
assert Bit(arr).to_text() == '111111100000011100000000'
4046

47+
@pytest.mark.skipif(np is None, reason='NumPy required')
4148
def test_ndarray_uint8(self):
4249
arr = np.array([254, 7, 0], dtype=np.uint8)
4350
with pytest.warns(UserWarning, match='expected elements to be boolean'):
4451
assert Bit(arr).to_text() == '110'
4552

53+
@pytest.mark.skipif(np is None, reason='NumPy required')
4654
def test_ndarray_uint16(self):
4755
arr = np.array([254, 7, 0], dtype=np.uint16)
4856
with pytest.warns(UserWarning, match='expected elements to be boolean'):

tests/test_django.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,17 @@
1111
from django.db.migrations.loader import MigrationLoader
1212
from django.forms import ModelForm
1313
from math import sqrt
14-
import numpy as np
1514
import os
1615
import pgvector.django
1716
from pgvector import HalfVector, SparseVector, Vector
1817
from pgvector.django import VectorExtension, VectorField, HalfVectorField, BitField, SparseVectorField, IvfflatIndex, HnswIndex, L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance
1918
from unittest import mock
2019

20+
try:
21+
import numpy as np
22+
except ImportError:
23+
np = None
24+
2125
settings.configure(
2226
DATABASES={
2327
'default': {
@@ -458,7 +462,7 @@ def test_missing(self):
458462
assert Item.objects.first().sparse_embedding is None
459463

460464
def test_vector_array(self):
461-
Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]).save()
465+
Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])]).save()
462466

463467
with connection.cursor() as cursor:
464468
from pgvector.psycopg import register_vector

tests/test_half_vector.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
import numpy as np
21
from pgvector import HalfVector
32
import pytest
43
from struct import pack
54

5+
try:
6+
import numpy as np
7+
except ImportError:
8+
np = None
9+
610

711
class TestHalfVector:
812
def test_list(self):
@@ -21,6 +25,7 @@ def test_list_list(self):
2125
HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type]
2226
assert str(error.value) == 'expected list[float]'
2327

28+
@pytest.mark.skipif(np is None, reason='NumPy required')
2429
def test_ndarray(self):
2530
arr = np.array([1, 2, 3])
2631
assert HalfVector(arr).to_list() == [1, 2, 3]
@@ -42,6 +47,7 @@ def test_equality(self):
4247
def test_dimensions(self):
4348
assert HalfVector([1, 2, 3]).dimensions() == 3
4449

50+
@pytest.mark.skipif(np is None, reason='NumPy required')
4551
def test_to_numpy_readonly(self):
4652
arr = HalfVector([1, 2, 3]).to_numpy()
4753
with pytest.raises(ValueError) as error:
@@ -51,11 +57,13 @@ def test_to_numpy_readonly(self):
5157
def test_from_text(self):
5258
vec = HalfVector.from_text('[1.5,2,3]')
5359
assert vec.to_list() == [1.5, 2, 3]
54-
assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
60+
if np is not None:
61+
assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
5562

5663
def test_from_binary(self):
5764
data = pack('>HH3e', 3, 0, 1.5, 2, 3)
5865
vec = HalfVector.from_binary(data)
5966
assert vec.to_list() == [1.5, 2, 3]
60-
assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
67+
if np is not None:
68+
assert np.array_equal(vec.to_numpy(), [1.5, 2, 3])
6169
assert vec.to_binary() == data

0 commit comments

Comments
 (0)