Skip to content

Commit

Permalink
Merge pull request #28 from upstash/new-features
Browse files Browse the repository at this point in the history
Implement new vector features
  • Loading branch information
CahidArda committed Jun 24, 2024
2 parents ba761be + bef5d61 commit 5e76f4b
Show file tree
Hide file tree
Showing 10 changed files with 1,006 additions and 108 deletions.
33 changes: 26 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,32 @@ There are a couple of ways of doing upserts:

```python
# as tuples, either of the form:
# - (id, vector, metadata, data)
# - (id, vector, metadata)
# - (id, vector)

index.upsert(
vectors=[
("id1", [0.1, 0.2], {"metadata_field": "metadata_value"}),
("id2", [0.3, 0.4]),
("id1", [0.1, 0.2], {"metadata_field": "metadata_value"}, "data-value"),
("id2", [0.2, 0.2], {"metadata_field": "metadata_value"}),
("id3", [0.3, 0.4]),
]
)
```

```python
# as dicts, either of the form:
# - {"id": id, "vector": vector, "metadata": metadata, "data": data)
# - {"id": id, "vector": vector, "metadata": metadata)
# - {"id": id, "vector": vector, "data": data)
# - {"id": id, "vector": vector}

index.upsert(
vectors=[
{"id": "id3", "vector": [0.1, 0.2], "metadata": {"field": "value"}},
{"id": "id4", "vector": [0.5, 0.6]},
{"id": "id4", "vector": [0.1, 0.2], "metadata": {"field": "value"}, "data": "value"},
{"id": "id5", "vector": [0.1, 0.2], "metadata": {"field": "value"}},
{"id": "id6", "vector": [0.1, 0.2], "data": "value"},
{"id": "id7", "vector": [0.5, 0.6]},
]
)
```
Expand All @@ -79,12 +85,15 @@ from upstash_vector import Vector
index.upsert(
vectors=[
Vector(id="id5", vector=[1, 2], metadata={"field": "value"}),
Vector(id="id6", vector=[6, 7]),
Vector(id="id6", vector=[1, 2], data="value"),
Vector(id="id7", vector=[6, 7]),
]
)
```

If the index is created with an embedding model, raw string data can be upserted.
In this case, the `data` field of the vector will also be set to the `data` passed
below, so that it can be accessed later.

```python
from upstash_vector import Data
Expand Down Expand Up @@ -121,6 +130,7 @@ res = index.query(
top_k=5,
include_vectors=False,
include_metadata=True,
include_data=True,
filter="metadata_f = 'metadata_v'"
)

Expand All @@ -131,6 +141,7 @@ for r in res:
r.score, # The similarity score of this vector to the query vector. Higher is more similar.
r.vector, # The value of the vector, if requested.
r.metadata, # The metadata of the vector, if requested and present.
r.data, # The data of the vector, if requested and present.
)
```

Expand All @@ -142,6 +153,7 @@ res = index.query(
top_k=5,
include_vectors=False,
include_metadata=True,
include_data=True,
)
```

Expand Down Expand Up @@ -171,6 +183,7 @@ res = index.fetch(
ids=["id3", "id4"],
include_vectors=False,
include_metadata=True,
include_data=True,
)

# List of fetch results, one for each id passed
Expand All @@ -181,7 +194,8 @@ for r in res:
print(
r.id, # The id used while upserting the vector
r.vector, # The value of the vector, if requested.
r.medata, # The metadata of the vector, if requested and present.
r.metadata, # The metadata of the vector, if requested and present.
r.data, # The metadata of the vector, if requested and present.
)
```

Expand All @@ -192,14 +206,16 @@ res = index.fetch(
"id1",
include_vectors=True,
include_metadata=True,
include_data=False,
)

r = res[0]
if r: # Can be None, if there is no such vector with the given id
print(
r.id, # The id used while upserting the vector
r.vector, # The value of the vector, if requested.
r.medata, # The metadata of the vector, if requested and present.
r.metadata, # The metadata of the vector, if requested and present.
r.data, # The metadata of the vector, if requested and present.
)
```

Expand All @@ -225,6 +241,7 @@ res = index.range(
limit=100,
include_vectors=False,
include_metadata=True,
include_data=True,
)

while res.next_cursor != "":
Expand All @@ -233,13 +250,15 @@ while res.next_cursor != "":
limit=100,
include_vectors=False,
include_metadata=True,
include_data=True,
)

for v in res.vectors:
print(
v.id, # The id used while upserting the vector
v.vector, # The value of the vector, if requested.
v.metadata, # The metadata of the vector, if requested and present.
v.data, # The data of the vector, if requested and present.
)
```

Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from os import environ

import pytest
import pytest_asyncio

from os import environ

from tests import NAMESPACES
from upstash_vector import Index, AsyncIndex

Expand Down
101 changes: 101 additions & 0 deletions tests/core/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,56 @@ def test_fetch_single(index: Index, ns: str):
assert res[0].vector == v1_values


@pytest.mark.parametrize("ns", NAMESPACES)
def test_fetch_with_data(index: Index, ns: str):
v1_id = "v1-id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_data = "data1"
v1_values = [0.1, 0.2]

v2_id = "v1-id2"
v2_values = [0.3, 0.4]

v3_id = "v1-id3"
v3_values = [0.5, 0.6]
v3_data = "data3"

index.upsert(
vectors=[
(v1_id, v1_values, v1_metadata, v1_data),
(v2_id, v2_values),
(v3_id, v3_values, None, v3_data),
],
namespace=ns,
)

res = index.fetch(
ids=[v1_id, v2_id, v3_id],
include_vectors=True,
include_metadata=True,
include_data=True,
namespace=ns,
)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values
assert res[0].data == v1_data

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values
assert res[1].data is None

assert res[2] is not None
assert res[2].id == v3_id
assert res[2].metadata is None
assert res[2].vector == v3_values
assert res[2].data == v3_data


@pytest.mark.asyncio
@pytest.mark.parametrize("ns", NAMESPACES)
async def test_fetch_with_vectors_with_metadata_async(async_index: AsyncIndex, ns: str):
Expand Down Expand Up @@ -277,3 +327,54 @@ async def test_fetch_single_async(async_index: AsyncIndex, ns: str):
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values


@pytest.mark.asyncio
@pytest.mark.parametrize("ns", NAMESPACES)
async def test_fetch_with_data_async(async_index: AsyncIndex, ns: str):
v1_id = "v1-id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_data = "data1"
v1_values = [0.1, 0.2]

v2_id = "v1-id2"
v2_values = [0.3, 0.4]

v3_id = "v1-id3"
v3_values = [0.5, 0.6]
v3_data = "data3"

await async_index.upsert(
vectors=[
(v1_id, v1_values, v1_metadata, v1_data),
(v2_id, v2_values),
(v3_id, v3_values, None, v3_data),
],
namespace=ns,
)

res = await async_index.fetch(
ids=[v1_id, v2_id, v3_id],
include_vectors=True,
include_metadata=True,
include_data=True,
namespace=ns,
)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values
assert res[0].data == v1_data

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values
assert res[1].data is None

assert res[2] is not None
assert res[2].id == v3_id
assert res[2].metadata is None
assert res[2].vector == v3_values
assert res[2].data == v3_data
Loading

0 comments on commit 5e76f4b

Please sign in to comment.