Skip to content

Commit

Permalink
up
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyaspimpalgaonkar committed Aug 26, 2024
1 parent 8316114 commit 805ce4c
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 34 deletions.
20 changes: 10 additions & 10 deletions py/cli/commands/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def ingest_files_from_urls(client, urls):
@click.pass_obj
def ingest_sample_file(client):
"""Ingest the first sample file into R2R."""
sample_file_url = "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/aristotle.txt"
sample_file_url = "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt"

with timer():
response = ingest_files_from_urls(client, [sample_file_url])
Expand All @@ -127,15 +127,15 @@ def ingest_sample_file(client):
def ingest_sample_files(client):
"""Ingest multiple sample files into R2R."""
urls = [
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/aristotle.txt",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/got.txt",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/pg_essay_1.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/pg_essay_2.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/pg_essay_3.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/pg_essay_4.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/pg_essay_5.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/lyft_2021.pdf",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/r2r/examples/data/uber_2021.pdf",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/got.txt",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_1.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_2.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_3.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_4.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_5.html",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/lyft_2021.pdf",
"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/uber_2021.pdf",
]
with timer():
response = ingest_files_from_urls(client, urls)
Expand Down
20 changes: 0 additions & 20 deletions py/core/configs/neo4j_kg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,6 @@ concurrent_request_limit = 256
stream = false
add_generation_kwargs = { }

[embedding]
provider = "openai"
base_model = "text-embedding-3-small"
base_dimension = 1_536
batch_size = 256
add_title_as_prefix = true

[ingestion]
excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ]

[kg]
provider = "neo4j"
batch_size = 256
Expand All @@ -35,13 +25,3 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"

[kg.kg_search_config]
model = "gpt-4o-mini"

[database]
provider = "postgres"

[agent]
system_instruction_name = "rag_agent"
tool_names = ["search"]

[agent.generation_config]
model = "gpt-4o-mini"
1 change: 1 addition & 0 deletions py/core/main/assembly/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def create_kg_pipe(self, *args, **kwargs) -> Any:
return KGTriplesExtractionPipe(
kg_provider=self.providers.kg,
llm_provider=self.providers.llm,
database_provider=self.providers.database,
prompt_provider=self.providers.prompt,
chunking_provider=self.providers.chunking,
kg_batch_size=self.config.kg.batch_size,
Expand Down
8 changes: 4 additions & 4 deletions py/core/pipes/kg/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Input(AsyncPipe.Input):
def __init__(
self,
kg_provider: KGProvider,
db_provider: DatabaseProvider,
database_provider: DatabaseProvider,
llm_provider: CompletionProvider,
prompt_provider: PromptProvider,
chunking_provider: ChunkingProvider,
Expand All @@ -66,7 +66,7 @@ def __init__(
)
self.kg_provider = kg_provider
self.prompt_provider = prompt_provider
self.db_provider = db_provider
self.database_provider = database_provider
self.llm_provider = llm_provider
self.chunking_provider = chunking_provider
self.kg_batch_size = kg_batch_size
Expand Down Expand Up @@ -195,10 +195,10 @@ async def process_extraction(extraction):
document_ids = [extraction for extraction in input.message]

if document_ids == []:
document_ids = [doc.id for doc in self.db_provider.get_documents_overview()]
document_ids = [doc.id for doc in self.database_provider.get_documents_overview()]

for document_id in document_ids:
extractions = [DocumentFragment(**extraction) for extraction in self.db_provider.get_document_chunks(
extractions = [DocumentFragment(**extraction) for extraction in self.database_provider.get_document_chunks(
document_id=document_id
)]

Expand Down

0 comments on commit 805ce4c

Please sign in to comment.