update type checks (#1331)

SciPhi-AI · Oct 3, 2024 · e7c42ac · e7c42ac
1 parent 4d3bb3a
commit e7c42ac
Show file tree

Hide file tree

Showing 36 changed files with 1,456 additions and 2,714 deletions.
diff --git a/.github/workflows/integration-test-workflow-debian.yml b/.github/workflows/integration-test-workflow-debian.yml
@@ -81,7 +81,7 @@ jobs:
         poetry run r2r rag --query="What was Uber's profit in 2020?"
 
         echo "RAG with Hybrid Search"
-        poetry run r2r rag --query="Who is John Snow?" --use-hybrid-search
+        poetry run r2r rag --query="Who is Jon Snow?" --use-hybrid-search
 
         echo "Streaming RAG"
         poetry run r2r rag --query="who was aristotle" --use-hybrid-search --stream

diff --git a/docs/cookbooks/walkthrough.mdx b/docs/cookbooks/walkthrough.mdx
@@ -773,21 +773,21 @@ R2R also supports hybrid search in RAG, combining the power of vector search and
 <Tabs>
 <Tab title="CLI">
 ```bash
-r2r rag --query="Who is John Snow?" --use-hybrid-search
+r2r rag --query="Who is Jon Snow?" --use-hybrid-search
 ```
 </Tab>
 
 
 <Tab title="Python">
 ```javascript
-results = client.rag("Who is John Snow?", {"use_hybrid_search": True})
+results = client.rag("Who is Jon Snow?", {"use_hybrid_search": True})
 ```
 </Tab>
 
 <Tab title="JavaScript">
 ```javascript
 await client.rag({
-  query: "Who is John Snow?",
+  query: "Who is Jon Snow?",
 });
 ```
 </Tab>
@@ -797,7 +797,7 @@ await client.rag({
 curl -X POST http://localhost:7272/v2/rag \
   -H "Content-Type: application/json" \
   -d '{
-    "query": "Who is John Snow?",
+    "query": "Who is Jon Snow?",
     "vector_search_settings": {
       "use_vector_search": true,
       "search_filters": {},
@@ -823,7 +823,7 @@ curl -X POST http://localhost:7272/v2/rag \
                 index=0,
                 logprobs=None,
                 message=ChatCompletionMessage(
-                    content="John Snow is mentioned in the context as one of Samwell (Sam) Tarly's closest companions at the Wall [5], [6].",
+                    content="Jon Snow is mentioned in the context as one of Samwell (Sam) Tarly's closest companions at the Wall [5], [6].",
                     role='assistant',
                     function_call=None,
                     tool_calls=None)
@@ -946,7 +946,7 @@ await client.rag({
 curl -X POST http://localhost:7272/v2/rag \
     -H "Content-Type: application/json" \
     -d '{
-        "query": "Who is John Snow?",
+        "query": "Who is Jon Snow?",
         "rag_generation_config": {
             "model": "claude-3-haiku-20240307",
             "temperature": 0.7

diff --git a/docs/documentation/js-sdk/ingestion.mdx b/docs/documentation/js-sdk/ingestion.mdx
@@ -52,7 +52,7 @@ const ingestResponse = await client.ingestFiles(files, {
 </ParamField>
 
 <ParamField path="ingestion_config" type="Optional[Union[dict, ChunkingConfig]]">
-  The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime.
+  The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime.
   <Expandable title="properties">
     <ParamField path="provider" type="str" default="unstructured_local">
       Which chunking provider to use. Options are "r2r", "unstructured_local", or "unstructured_api".
@@ -220,7 +220,7 @@ const updateResponse = await client.updateFiles(files, {
   </ParamField>
 
   <ParamField path="ingestion_config" type="Record<string, any>">
-    The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime.
+    The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime.
     <Expandable title="properties">
     <ParamField path="provider" type="str" default="r2r">
     Which chunking provider to use, `r2r` or `unstructured`. Selecting `unstructured` is generally recommended when parsing with `unstructured` or `unstructured_api`.

diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx
@@ -418,7 +418,7 @@ Use hybrid search in RAG:
 
 ```javascript
 const hybridRagResponse = await client.rag({
-  query: "Who is John Snow?",
+  query: "Who is Jon Snow?",
   use_hybrid_search: true
 });
 ```

diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx
@@ -57,7 +57,7 @@ ingest_response = client.ingest_files(
 </ParamField>
 
 <ParamField path="ingestion_config" type="Optional[Union[dict, ChunkingConfig]]">
-  The chunking config override parameter enables developers to customize their R2R chunking strategy at runtime.
+  The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime.
   <Expandable title="properties">
     <ParamField path="provider" type="str" default="unstructured_local">
       Which chunking provider to use. Options are "r2r", "unstructured_local", or "unstructured_api".

diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx
@@ -469,7 +469,7 @@ Learn more about the RAG [API here](/api-reference/endpoint/rag). It allows perf
 
 ```python
 hybrid_rag_response = client.rag(
-    "Who is John Snow?",
+    "Who is Jon Snow?",
     vector_search_settings={"use_hybrid_search": True}
 )
 ```

diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py
@@ -52,8 +52,6 @@
     "KGCreationSettings",
     "KGEnrichmentSettings",
     "KGRunType",
-    "KGCreationEstimationResponse",
-    "KGEnrichmentEstimationResponse",
     # User abstractions
     "Token",
     "TokenData",

diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py
@@ -29,9 +29,7 @@
     Triple,
 )
 from shared.abstractions.kg import (
-    KGCreationEstimationResponse,
     KGCreationSettings,
-    KGEnrichmentEstimationResponse,
     KGEnrichmentSettings,
     KGRunType,
 )
@@ -122,8 +120,6 @@
     "KGCreationSettings",
     "KGEnrichmentSettings",
     "KGRunType",
-    "KGCreationEstimationResponse",
-    "KGEnrichmentEstimationResponse",
     # User abstractions
     "Token",
     "TokenData",

diff --git a/py/core/base/api/models/__init__.py b/py/core/base/api/models/__init__.py
@@ -36,6 +36,7 @@
     WrappedCollectionListResponse,
     WrappedCollectionOverviewResponse,
     WrappedCollectionResponse,
+    WrappedDeleteResponse,
     WrappedDocumentChunkResponse,
     WrappedDocumentOverviewResponse,
     WrappedGetPromptsResponse,
@@ -107,6 +108,7 @@
     "WrappedUserCollectionResponse",
     "WrappedDocumentChunkResponse",
     "WrappedCollectionOverviewResponse",
+    "WrappedDeleteResponse",
     # Retrieval Responses
     "SearchResponse",
     "RAGResponse",

diff --git a/py/core/base/providers/kg.py b/py/core/base/providers/kg.py
@@ -84,6 +84,9 @@ async def add_kg_extractions(
     @abstractmethod
     async def get_entities(
         self,
+        collection_id: UUID,
+        offset: int,
+        limit: int,
         entity_ids: list[str] | None = None,
         with_description: bool = False,
     ) -> list[Entity]:
@@ -92,7 +95,11 @@ async def get_entities(
 
     @abstractmethod
     async def get_triples(
-        self, triple_ids: list[str] | None = None
+        self,
+        collection_id: UUID,
+        offset: int,
+        limit: int,
+        triple_ids: list[str] | None = None,
     ) -> list[Triple]:
         """Abstract method to get triples."""
         pass
@@ -155,7 +162,7 @@ async def perform_graph_clustering(
         self,
         collection_id: UUID,
         leiden_params: dict,  # TODO - Add typing for leiden_params
-    ) -> Tuple[int, int, set[tuple[int, Any]]]:
+    ) -> int:
         """Abstract method to perform graph clustering."""
         pass
 
@@ -172,12 +179,18 @@ async def get_community_details(self, community_number: int):
         pass
 
     @abstractmethod
-    async def get_entity_count(self, document_id: UUID) -> int:
+    async def get_entity_count(
+        self,
+        collection_id: Optional[UUID] = None,
+        document_id: Optional[UUID] = None,
+    ) -> int:
         """Abstract method to get the entity count."""
         pass
 
     @abstractmethod
-    async def delete_graph_for_collection(self, collection_id: UUID) -> None:
+    async def delete_graph_for_collection(
+        self, collection_id: UUID, cascade: bool
+    ) -> None:
         """Abstract method to delete the graph for a collection."""
         pass
 

diff --git a/py/core/base/providers/orchestration.py b/py/core/base/providers/orchestration.py
@@ -50,7 +50,9 @@ def failure(self, *args, **kwargs) -> Any:
         pass
 
     @abstractmethod
-    def register_workflows(self, workflow: Workflow, service: Any) -> None:
+    def register_workflows(
+        self, workflow: Workflow, service: Any, messages: dict
+    ) -> None:
         pass
 
     @abstractmethod

diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py
@@ -320,7 +320,9 @@ def _validate_ingestion_config(ingestion_config):
         if ingestion_config:
             R2RProviderFactory.create_ingestion_provider(ingestion_config)
         else:
-            logger.info("No chunking config override provided. Using default.")
+            logger.info(
+                "No ingestion config override provided. Using default."
+            )
 
     @staticmethod
     async def _process_files(files):

diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py
@@ -15,6 +15,10 @@
 from core.base.providers import OrchestrationProvider, Workflow
 from core.utils import generate_default_user_collection_id
 from shared.abstractions.kg import KGRunType
+from shared.api.models.kg.responses import (
+    KGCreationEstimationResponse,
+    KGEnrichmentEstimationResponse,
+)
 
 from ..services.kg_service import KgService
 from .base_router import BaseRouter
@@ -69,8 +73,7 @@ async def create_graph(
                 description="Settings for the graph creation process.",
             ),
             auth_user=Depends(self.service.providers.auth.auth_wrapper),
-            response_model=WrappedKGCreationResponse,
-        ):
+        ) -> WrappedKGCreationResponse:
             """
             Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.
             This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.
@@ -107,7 +110,7 @@ async def create_graph(
                 "user": auth_user.json(),
             }
 
-            return await self.orchestration_provider.run_workflow(
+            return await self.orchestration_provider.run_workflow(  # type: ignore
                 "create-graph", {"request": workflow_input}, {}
             )
 
@@ -129,8 +132,7 @@ async def enrich_graph(
                 description="Settings for the graph enrichment process.",
             ),
             auth_user=Depends(self.service.providers.auth.auth_wrapper),
-            response_model=WrappedKGEnrichmentResponse,
-        ):
+        ) -> WrappedKGEnrichmentResponse:
             """
             This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.
             """
@@ -164,7 +166,7 @@ async def enrich_graph(
                 "user": auth_user.json(),
             }
 
-            return await self.orchestration_provider.run_workflow(
+            return await self.orchestration_provider.run_workflow(  # type: ignore
                 "enrich-graph", {"request": workflow_input}, {}
             )