Skip to content

Commit

Permalink
Feature/revive integration tests (#1343)
Browse files Browse the repository at this point in the history
* add postgres to integration

* add postgres to integration

* up

* rename

* hardcode

* add back postgres

* add back postgres

* add pgvector

* add pgvector

* add pgvector

* add pgvector

* add pgvector

* add pgvector

* add pgvector

* tweak config docs

* fix integration suite

* fix integration suite

* fix integration suite

* up

* up

* up

* up

* up

* up

* up

* up

* up

* update integration test

* final user tests

* final user tests
  • Loading branch information
emrgnt-cmplxty authored Oct 5, 2024
1 parent 067b19f commit f7b3f60
Show file tree
Hide file tree
Showing 24 changed files with 1,041 additions and 202 deletions.
186 changes: 85 additions & 101 deletions .github/workflows/integration-test-workflow-debian.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: R2R CLI Integration Test (Debian GNU/Linux 12 (bookworm) amd64)
name: R2R CLI Integration and Regression Test

on:
push:
branches:
- '**'
workflow_dispatch:
- '**' # Trigger on all branches
workflow_dispatch: # Allow manual trigger

jobs:
build-and-test:
Expand All @@ -16,29 +16,57 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
TELEMETRY_ENABLED: false
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }}
POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }}
POSTGRES_PORT: ${{ secrets.POSTGRES_PORT }}
R2R_PROJECT_NAME: ${{ secrets.R2R_PROJECT_NAME }}
POSTGRES_HOST: localhost
POSTGRES_DBNAME: postgres
POSTGRES_PORT: 5432
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
R2R_PROJECT_NAME: r2r_default

steps:
- uses: actions/checkout@v4
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: '3.x'
python-version: '3.10' # Use a stable Python version

- name: Install Poetry
- name: Install Poetry and dependencies
run: |
curl -sSL https://install.python-poetry.org | python3 -
cd py && poetry install -E core -E ingestion-bundle
- name: Install dependencies
working-directory: ./py
- name: Remove pre-installed PostgreSQL
run: |
sudo apt-get purge -y 'postgresql-*'
sudo rm -rf /var/lib/postgresql
sudo rm -rf /var/log/postgresql
sudo rm -rf /etc/postgresql
- name: Add PostgreSQL Apt Repository
run: |
# Add the PostgreSQL Apt repository
echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
# Download and add the repository GPG key
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo gpg --dearmor -o /usr/share/keyrings/postgresql-archive-keyring.gpg
- name: Install PostgreSQL 15 and pgvector
run: |
poetry install -E core -E ingestion-bundle
sudo apt-get update
sudo apt-get install -y postgresql-15 postgresql-client-15 postgresql-15-pgvector
- name: Start PostgreSQL 15 service
run: |
sudo systemctl enable postgresql@15-main
sudo systemctl start postgresql@15-main
- name: Configure PostgreSQL
run: |
# Change to a directory accessible by the postgres user to avoid permission warnings
cd /
sudo -u postgres /usr/lib/postgresql/15/bin/psql -c "ALTER USER postgres PASSWORD 'postgres';"
sudo -u postgres /usr/lib/postgresql/15/bin/psql -c "CREATE EXTENSION vector;"
- name: Start R2R server
working-directory: ./py
Expand All @@ -47,102 +75,58 @@ jobs:
echo "Waiting for services to start..."
sleep 30
- name: Run integration tests
- name: Run CLI Ingestion
working-directory: ./py
run: |
echo "R2R Version"
poetry run r2r version
poetry run python tests/integration/harness_cli.py test_ingest_sample_file_cli
poetry run python tests/integration/harness_cli.py test_document_overview_sample_file_cli
poetry run python tests/integration/harness_cli.py test_document_chunks_sample_file_cli
poetry run python tests/integration/harness_cli.py test_delete_and_reingest_sample_file_cli
- name: Walkthrough
- name: Run CLI Retrieval
working-directory: ./py
run: |
echo "Ingest Data"
poetry run r2r ingest-sample-files
echo "Get Documents Overview"
poetry run r2r documents-overview
echo "Get Document Chunks"
poetry run r2r document-chunks --document-id=9fbe403b-c11c-5aae-8ade-ef22980c3ad1
echo "Delete Documents"
poetry run r2r delete --filter=document_id:eq:9fbe403b-c11c-5aae-8ade-ef22980c3ad1
echo "Update Document"
poetry run r2r update-files core/examples/data/aristotle_v2.txt --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1
echo "Vector Search"
poetry run r2r search --query="What was Uber's profit in 2020?"
echo "Hybrid Search"
r2r search --query="What was Uber's profit in 2020?" --use-hybrid-search
echo "Basic RAG"
poetry run r2r rag --query="What was Uber's profit in 2020?"
echo "RAG with Hybrid Search"
poetry run r2r rag --query="Who is Jon Snow?" --use-hybrid-search
echo "Streaming RAG"
poetry run r2r rag --query="who was aristotle" --use-hybrid-search --stream
echo "User Registration"
curl -X POST http://localhost:7272/v2/register \
-H "Content-Type: application/json" \
-d '{
"email": "[email protected]",
"password": "password123"
}'
poetry run python tests/integration/harness_cli.py test_vector_search_sample_file_filter_cli
poetry run python tests/integration/harness_cli.py test_rag_response_sample_file_cli
poetry run python tests/integration/harness_cli.py test_rag_response_stream_sample_file_cli
echo "User Login"
curl -X POST http://localhost:7272/v2/login \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "[email protected]&password=password123"
echo "Users Overview"
poetry run r2r users-overview
echo "Logging"
poetry run r2r logs
echo "Analytics"
poetry run r2r analytics --filters '{"search_latencies": "search_latency"}' --analysis-types '{"search_latencies": ["basic_statistics", "search_latency"]}'
- name: GraphRAG
- name: Run SDK Ingestion
working-directory: ./py
run: |
echo "Create Knowledge Graph"
poetry run r2r create-graph --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1
echo "Inspect Knowledge Graph"
poetry run r2r inspect-knowledge-graph
echo "Graph Enrichment"
poetry run r2r enrich-graph
echo "Local Search"
r2r search --query="Who is Aristotle?" --use-kg-search --kg-search-type=local
echo "Global Search"
r2r search --query="What were Aristotles key contributions to philosophy?" --use-kg-search --kg-search-type=global --max-llm-queries-for-global-search=100
echo "RAG"
r2r rag --query="What are the key contributions of Aristotle to modern society?" --use-kg-search --kg-search-type=global --max-llm-queries-for-global-search=100
- name: Advanced RAG
poetry run python tests/integration/harness_sdk.py test_ingest_sample_file_sdk
poetry run python tests/integration/harness_sdk.py test_reingest_sample_file_sdk
poetry run python tests/integration/harness_sdk.py test_document_overview_sample_file_sdk
poetry run python tests/integration/harness_sdk.py test_document_chunks_sample_file_sdk
poetry run python tests/integration/harness_sdk.py test_delete_and_reingest_sample_file_sdk
poetry run python tests/integration/harness_sdk.py test_ingest_sample_file_with_config_sdk
- name: Run SDK Retrieval
working-directory: ./py
run: |
echo "HyDE"
poetry run r2r rag --query="who was aristotle" --use-hybrid-search --stream --search-strategy=hyde
poetry run python tests/integration/harness_sdk.py test_vector_search_sample_file_filter_sdk
poetry run python tests/integration/harness_sdk.py test_hybrid_search_sample_file_filter_sdk
poetry run python tests/integration/harness_sdk.py test_rag_response_sample_file_sdk
echo "Rag-Fusion"
r2r rag --query="Explain the theory of relativity" --use-hybrid-search --stream --search-strategy=rag_fusion
- name: Run SDK Auth
working-directory: ./py
run: |
poetry run python tests/integration/harness_sdk.py test_user_registration_and_login
poetry run python tests/integration/harness_sdk.py test_duplicate_user_registration
poetry run python tests/integration/harness_sdk.py test_token_refresh
poetry run python tests/integration/harness_sdk.py test_user_document_management
poetry run python tests/integration/harness_sdk.py test_user_search_and_rag
poetry run python tests/integration/harness_sdk.py test_user_password_management
poetry run python tests/integration/harness_sdk.py test_user_profile_management
poetry run python tests/integration/harness_sdk.py test_user_logout
- name: Stop R2R server
if: always()
run: ps aux | grep "r2r serve" | awk '{print $2}' | xargs kill || true

- name: Uninstall PostgreSQL after tests (Optional)
if: always()
run: |
pkill -f "r2r serve"
sudo apt-get purge -y 'postgresql-*'
sudo rm -rf /var/lib/postgresql
sudo rm -rf /var/log/postgresql
sudo rm -rf /etc/postgresql
2 changes: 1 addition & 1 deletion docs/api-reference/openapi.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions docs/cookbooks/walkthrough.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ concurrent_request_limit = 16
model = "openai/gpt-4o"
temperature = 0.5

[chunking]
[ingestion]
provider = "r2r"
chunking_strategy = "recursive"
chunk_size = 512
chunk_overlap = 256
chunk_size = 1_024
chunk_overlap = 512
excluded_parsers = ["mp4"]
```

Expand Down
2 changes: 1 addition & 1 deletion docs/documentation/installation/light/docker.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pip install r2r
<Accordion title="Local LLMs" icon="house" defaultOpen={false}>
To start R2R with your local computer as the default LLM inference provider:
```bash
r2r serve --docker --config-name=light_local_llm
r2r serve --docker --config-name=local_llm
```
Then, in a separate terminal you will need to run Ollama to provide completions:
```bash
Expand Down
1 change: 1 addition & 0 deletions py/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
"PipeType",
## PROVIDERS
# Base provider classes
"AppConfig",
"Provider",
"ProviderConfig",
# Auth provider
Expand Down
1 change: 1 addition & 0 deletions py/core/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
"PipeType",
## PROVIDERS
# Base provider classes
"AppConfig",
"Provider",
"ProviderConfig",
# Auth provider
Expand Down
2 changes: 1 addition & 1 deletion py/core/base/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@
RAGAgentResponse,
RAGResponse,
SearchResponse,
WrappedCompletionResponse,
WrappedRAGAgentResponse,
WrappedRAGResponse,
WrappedSearchResponse,
WrappedCompletionResponse,
)

__all__ = [
Expand Down
4 changes: 2 additions & 2 deletions py/core/base/providers/kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def get_entities(
limit: int,
entity_ids: list[str] | None = None,
with_description: bool = False,
) -> list[Entity]:
) -> dict:
"""Abstract method to get entities."""
pass

Expand All @@ -100,7 +100,7 @@ async def get_triples(
offset: int,
limit: int,
triple_ids: list[str] | None = None,
) -> list[Triple]:
) -> dict:
"""Abstract method to get triples."""
pass

Expand Down
2 changes: 1 addition & 1 deletion py/core/configs/r2r_aws_bedrock.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require_email_verification = false
default_admin_email = "[email protected]"
default_admin_password = "change_me_immediately"

[chunking]
[ingestion]
provider = "unstructured_local"
strategy = "auto"
chunking_strategy = "by_title"
Expand Down
18 changes: 9 additions & 9 deletions py/core/main/api/auth_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ async def get_user_app(
@self.router.put("/user", response_model=WrappedUserResponse)
@self.base_endpoint
async def put_user_app(
user_id: str = Body(None, description="ID of the user to update"),
user_id: UUID = Body(None, description="ID of the user to update"),
email: EmailStr | None = Body(
None, description="Updated email address"
),
Expand All @@ -128,21 +128,21 @@ async def put_user_app(
This endpoint allows the authenticated user to update their profile information.
"""

if is_superuser is not None and not auth_user.is_superuser:
raise R2RException(
"Only superusers can update the superuser status of a user",
403,
)

try:
user_uuid = UUID(user_id)
except ValueError:
raise R2RException(
status_code=400, message="Invalid user ID format."
)
if not auth_user.is_superuser:
if not auth_user.id == user_id:
raise R2RException(
"Only superusers can update other users' information",
403,
)

return await self.service.update_user(
user_id=user_uuid,
user_id=user_id,
email=email,
is_superuser=is_superuser,
name=name,
Expand Down
Loading

0 comments on commit f7b3f60

Please sign in to comment.