diff --git a/src/curate_gpt/cli.py b/src/curate_gpt/cli.py index 9b36a78..56bfc90 100644 --- a/src/curate_gpt/cli.py +++ b/src/curate_gpt/cli.py @@ -415,7 +415,7 @@ def search(query, path, collection, show_documents, database_type, **kwargs): db = get_store(database_type, path) results = db.search(query, collection=collection, **kwargs) i = 0 - for obj, distance, meta in results: + for obj, distance, _meta in results: i += 1 print(f"## {i} DISTANCE: {distance}") print(yaml.dump(obj, sort_keys=False)) @@ -537,7 +537,7 @@ def matches(id, path, collection, database_type): print(obj) results = db.matches(obj, collection=collection) i = 0 - for obj, distance, meta in results: + for obj, distance, _meta in results: i += 1 print(f"## ID:- {obj['id']}") print(f"## DISTANCE- {distance}") diff --git a/src/curate_gpt/store/chromadb_adapter.py b/src/curate_gpt/store/chromadb_adapter.py index 6b0174f..bb84aef 100644 --- a/src/curate_gpt/store/chromadb_adapter.py +++ b/src/curate_gpt/store/chromadb_adapter.py @@ -18,10 +18,10 @@ from linkml_runtime.utils.yamlutils import YAMLRoot from oaklib.utilities.iterator_utils import chunk from pydantic import BaseModel -from curate_gpt.store.metadata import CollectionMetadata -from curate_gpt.store.vocab import OBJECT, QUERY, PROJECTION, SEARCH_RESULT from curate_gpt.store.db_adapter import DBAdapter +from curate_gpt.store.metadata import CollectionMetadata +from curate_gpt.store.vocab import OBJECT, PROJECTION, QUERY, SEARCH_RESULT from curate_gpt.utils.vector_algorithms import mmr_diversified_search logger = logging.getLogger(__name__) diff --git a/src/curate_gpt/store/db_adapter.py b/src/curate_gpt/store/db_adapter.py index d06c527..9f9d25c 100644 --- a/src/curate_gpt/store/db_adapter.py +++ b/src/curate_gpt/store/db_adapter.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path -from typing import ClassVar, Dict, Iterable, Iterator, List, Optional, TextIO, Tuple, Union +from typing import ClassVar, Dict, Iterable, Iterator, List, Optional, TextIO, Union import pandas as pd import yaml @@ -15,15 +15,15 @@ from curate_gpt.store.metadata import CollectionMetadata from curate_gpt.store.schema_proxy import SchemaProxy from curate_gpt.store.vocab import ( - OBJECT, - SEARCH_RESULT, - QUERY, - PROJECTION, - FILE_LIKE, - EMBEDDINGS, + DEFAULT_COLLECTION, DOCUMENTS, + EMBEDDINGS, + FILE_LIKE, METADATAS, - DEFAULT_COLLECTION, + OBJECT, + PROJECTION, + QUERY, + SEARCH_RESULT, ) logger = logging.getLogger(__name__) diff --git a/src/curate_gpt/store/db_metadata.py b/src/curate_gpt/store/db_metadata.py index d68d19f..992b266 100644 --- a/src/curate_gpt/store/db_metadata.py +++ b/src/curate_gpt/store/db_metadata.py @@ -2,6 +2,7 @@ from pydantic import BaseModel, ConfigDict import yaml +from pydantic import BaseModel class DBSettings(BaseModel): @@ -19,7 +20,7 @@ class DBSettings(BaseModel): ef_construction: int = 128 """ - Construction parameter for hnsw index. + Construction parameter for hnsw index. Higher values are more accurate but slower. """ diff --git a/src/curate_gpt/store/duckdb_adapter.py b/src/curate_gpt/store/duckdb_adapter.py index a0b96f7..e8cd4f8 100644 --- a/src/curate_gpt/store/duckdb_adapter.py +++ b/src/curate_gpt/store/duckdb_adapter.py @@ -9,7 +9,6 @@ import re import time from dataclasses import dataclass, field -from pathlib import Path from typing import Any, Callable, ClassVar, Dict, Iterable, Iterator, List, Mapping, Optional, Union import duckdb @@ -27,21 +26,21 @@ from curate_gpt.store.db_adapter import DBAdapter from curate_gpt.store.duckdb_result import DuckDBSearchResult from curate_gpt.store.metadata import CollectionMetadata -from curate_gpt.utils.vector_algorithms import mmr_diversified_search from curate_gpt.store.vocab import ( - OBJECT, - QUERY, - PROJECTION, - EMBEDDINGS, + DISTANCES, DOCUMENTS, + EMBEDDINGS, + IDS, METADATAS, MODEL_DIMENSIONS, MODELS, + OBJECT, OPENAI_MODEL_DIMENSIONS, - IDS, + PROJECTION, + QUERY, SEARCH_RESULT, - DISTANCES, ) +from curate_gpt.utils.vector_algorithms import mmr_diversified_search logger = logging.getLogger(__name__) diff --git a/src/curate_gpt/store/duckdb_result.py b/src/curate_gpt/store/duckdb_result.py index 91750e2..64b79b6 100644 --- a/src/curate_gpt/store/duckdb_result.py +++ b/src/curate_gpt/store/duckdb_result.py @@ -1,8 +1,5 @@ -import json -from typing import Any, Dict, List, Optional, Set, Iterator, Tuple +from typing import Any, Dict, Iterator, List, Optional, Set, Tuple -import jsonlines -import yaml from pydantic import BaseModel, ConfigDict SEARCH_RESULT = Tuple[Dict[str, Any], Dict, float, Optional[Dict]] diff --git a/src/curate_gpt/store/vocab.py b/src/curate_gpt/store/vocab.py index e835a92..9e0d665 100644 --- a/src/curate_gpt/store/vocab.py +++ b/src/curate_gpt/store/vocab.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Union, Dict, List, Tuple, Optional, TextIO +from typing import Dict, List, Optional, TextIO, Tuple, Union from linkml_runtime.utils.yamlutils import YAMLRoot from pydantic import BaseModel diff --git a/src/curate_gpt/wrappers/general/github_wrapper.py b/src/curate_gpt/wrappers/general/github_wrapper.py index 7f8191c..12c4a6f 100644 --- a/src/curate_gpt/wrappers/general/github_wrapper.py +++ b/src/curate_gpt/wrappers/general/github_wrapper.py @@ -1,4 +1,4 @@ -"""Chat with a Google Drive.""" +"""Chat with issues from a GitHub repository.""" import logging import os @@ -193,6 +193,10 @@ def issue_comments(self, issue_number: str) -> Iterator[Dict]: url = response.links.get("next", {}).get("url") def transform_issue(self, obj: Dict) -> Issue: + + if not obj.get("body"): + obj["body"] = "" + issue = Issue( id=obj.get("url"), number=obj.get("number"), diff --git a/tests/agents/conftest.py b/tests/agents/conftest.py index e1b84c6..f395107 100644 --- a/tests/agents/conftest.py +++ b/tests/agents/conftest.py @@ -1,8 +1,8 @@ import pytest + from curate_gpt import ChromaDBAdapter from curate_gpt.store import SchemaProxy from curate_gpt.wrappers.ontology import ONTOLOGY_MODEL_PATH - from tests import INPUT_DBS diff --git a/tests/agents/test_chat.py b/tests/agents/test_chat.py index 10ee339..1001ae5 100644 --- a/tests/agents/test_chat.py +++ b/tests/agents/test_chat.py @@ -1,6 +1,7 @@ import re import pytest + from curate_gpt.agents.chat_agent import ChatAgent from curate_gpt.extract import BasicExtractor diff --git a/tests/agents/test_concept_recognizer.py b/tests/agents/test_concept_recognizer.py index eb47eca..fc7d9c2 100644 --- a/tests/agents/test_concept_recognizer.py +++ b/tests/agents/test_concept_recognizer.py @@ -1,5 +1,6 @@ import pytest import yaml + from curate_gpt.agents.concept_recognition_agent import AnnotationMethod, ConceptRecognitionAgent from curate_gpt.extract.basic_extractor import BasicExtractor diff --git a/tests/agents/test_dase.py b/tests/agents/test_dase.py index 9de656e..dd76af7 100644 --- a/tests/agents/test_dase.py +++ b/tests/agents/test_dase.py @@ -1,5 +1,6 @@ import pytest import yaml + from curate_gpt.agents.dase_agent import DatabaseAugmentedStructuredExtraction from curate_gpt.agents.dragon_agent import DragonAgent from curate_gpt.extract.basic_extractor import BasicExtractor diff --git a/tests/agents/test_dragon.py b/tests/agents/test_dragon.py index e3a5c10..5fc0b81 100644 --- a/tests/agents/test_dragon.py +++ b/tests/agents/test_dragon.py @@ -1,5 +1,6 @@ import pytest import yaml + from curate_gpt.agents.dragon_agent import DragonAgent from curate_gpt.extract.basic_extractor import BasicExtractor diff --git a/tests/agents/test_mapper.py b/tests/agents/test_mapper.py index 748a42d..d91334f 100644 --- a/tests/agents/test_mapper.py +++ b/tests/agents/test_mapper.py @@ -1,4 +1,5 @@ import pytest + from curate_gpt.agents import MappingAgent from curate_gpt.agents.mapping_agent import MappingPredicate from curate_gpt.extract import BasicExtractor diff --git a/tests/cli/test_chat_cli.py b/tests/cli/test_chat_cli.py index c9a75fa..d808409 100644 --- a/tests/cli/test_chat_cli.py +++ b/tests/cli/test_chat_cli.py @@ -1,5 +1,4 @@ from curate_gpt.cli import main - from tests import INPUT_DIR ONT_DB = str(INPUT_DIR / "go-nucleus.db") diff --git a/tests/cli/test_store_cli.py b/tests/cli/test_store_cli.py index 305ee5a..c3c522a 100644 --- a/tests/cli/test_store_cli.py +++ b/tests/cli/test_store_cli.py @@ -1,5 +1,4 @@ from curate_gpt.cli import main - from tests import INPUT_DIR ONT_DB = str(INPUT_DIR / "go-nucleus.db") diff --git a/tests/evaluation/conftest.py b/tests/evaluation/conftest.py index 7fc3840..c3724dc 100644 --- a/tests/evaluation/conftest.py +++ b/tests/evaluation/conftest.py @@ -1,9 +1,9 @@ import pytest +from oaklib import get_adapter + from curate_gpt import ChromaDBAdapter from curate_gpt.store import SchemaProxy from curate_gpt.wrappers.ontology import ONTOLOGY_MODEL_PATH, OntologyWrapper -from oaklib import get_adapter - from tests import INPUT_DBS, INPUT_DIR diff --git a/tests/evaluation/test_calculate_statistics.py b/tests/evaluation/test_calculate_statistics.py index e566264..9aefff8 100644 --- a/tests/evaluation/test_calculate_statistics.py +++ b/tests/evaluation/test_calculate_statistics.py @@ -1,4 +1,5 @@ import pytest + from curate_gpt.evaluation.calc_statistics import ( aggregate_metrics, calculate_metrics, diff --git a/tests/evaluation/test_runner.py b/tests/evaluation/test_runner.py index 202cc53..d0bbd64 100644 --- a/tests/evaluation/test_runner.py +++ b/tests/evaluation/test_runner.py @@ -1,8 +1,8 @@ import pytest import yaml + from curate_gpt.evaluation.evaluation_datamodel import Task from curate_gpt.evaluation.runner import run_task - from tests import OUTPUT_DIR diff --git a/tests/extract/test_extractor.py b/tests/extract/test_extractor.py index 452741d..1e59c59 100644 --- a/tests/extract/test_extractor.py +++ b/tests/extract/test_extractor.py @@ -1,13 +1,14 @@ from typing import List import pytest +from linkml_runtime.utils.schema_builder import SchemaBuilder +from pydantic import BaseModel, ConfigDict + from curate_gpt.extract.basic_extractor import BasicExtractor from curate_gpt.extract.extractor import AnnotatedObject from curate_gpt.extract.openai_extractor import OpenAIExtractor from curate_gpt.extract.recursive_extractor import RecursiveExtractor from curate_gpt.store.schema_proxy import SchemaProxy -from linkml_runtime.utils.schema_builder import SchemaBuilder -from pydantic import BaseModel, ConfigDict class Occupation(BaseModel): diff --git a/tests/store/test_chromadb_adapter.py b/tests/store/test_chromadb_adapter.py index 20232a7..50b2f03 100644 --- a/tests/store/test_chromadb_adapter.py +++ b/tests/store/test_chromadb_adapter.py @@ -1,18 +1,14 @@ -import json import shutil -from dataclasses import dataclass -from pathlib import Path -from typing import Dict, Iterator +from typing import Dict import pytest import yaml -from curate_gpt.store.chromadb_adapter import ChromaDBAdapter -from curate_gpt.store.schema_proxy import SchemaProxy -from curate_gpt.wrappers.general.json_wrapper import JSONWrapper -from curate_gpt.wrappers.ontology import ONTOLOGY_MODEL_PATH, OntologyWrapper from linkml_runtime.utils.schema_builder import SchemaBuilder from oaklib import get_adapter +from curate_gpt.store.chromadb_adapter import ChromaDBAdapter +from curate_gpt.store.schema_proxy import SchemaProxy +from curate_gpt.wrappers.ontology import ONTOLOGY_MODEL_PATH, OntologyWrapper from tests import INPUT_DBS, INPUT_DIR, OUTPUT_CHROMA_DB_PATH, OUTPUT_DIR EMPTY_DB_PATH = OUTPUT_DIR / "empty_db" diff --git a/tests/store/test_duckdb_adapter.py b/tests/store/test_duckdb_adapter.py index bbc6443..2e954a9 100644 --- a/tests/store/test_duckdb_adapter.py +++ b/tests/store/test_duckdb_adapter.py @@ -1,21 +1,15 @@ -import itertools import os import shutil -import time -from dataclasses import dataclass -from pathlib import Path from typing import Dict import pytest import yaml +from linkml_runtime.utils.schema_builder import SchemaBuilder +from oaklib import get_adapter -from curate_gpt.store import CollectionMetadata from curate_gpt.store.duckdb_adapter import DuckDBAdapter from curate_gpt.store.schema_proxy import SchemaProxy from curate_gpt.wrappers.ontology import OntologyWrapper -from linkml_runtime.utils.schema_builder import SchemaBuilder -from oaklib import get_adapter - from tests import INPUT_DBS, INPUT_DIR, OUTPUT_DIR, OUTPUT_DUCKDB_PATH EMPTY_DB_PATH = os.path.join(OUTPUT_DIR, "empty_duckdb") @@ -165,7 +159,7 @@ def test_ontology_matches(ontology_db): first_obj = results[0][0] print("the id", first_obj["id"]) - first_meta = results[0][2] + # first_meta = results[0][2] new_id, new_definition = "Palm Beach", "A beach with palm trees" updated_obj = { "id": new_id, diff --git a/tests/store/test_in_memory_adapter.py b/tests/store/test_in_memory_adapter.py index cab48ac..bd66ee9 100644 --- a/tests/store/test_in_memory_adapter.py +++ b/tests/store/test_in_memory_adapter.py @@ -1,11 +1,11 @@ from typing import Dict import pytest +from linkml_runtime.utils.schema_builder import SchemaBuilder + from curate_gpt import DBAdapter from curate_gpt.store import get_store from curate_gpt.store.schema_proxy import SchemaProxy -from linkml_runtime.utils.schema_builder import SchemaBuilder - from tests import OUTPUT_DIR EMPTY_DB_PATH = OUTPUT_DIR / "empty_db" diff --git a/tests/utils/test_search.py b/tests/utils/test_search.py index c3711d7..5b17539 100644 --- a/tests/utils/test_search.py +++ b/tests/utils/test_search.py @@ -1,5 +1,6 @@ import numpy as np import pytest + from curate_gpt.utils.vector_algorithms import mmr_diversified_search vectors = np.array( diff --git a/tests/wrappers/test_bioportal.py b/tests/wrappers/test_bioportal.py index a53780d..c9f9a0e 100644 --- a/tests/wrappers/test_bioportal.py +++ b/tests/wrappers/test_bioportal.py @@ -1,11 +1,11 @@ import logging import pytest + from curate_gpt import ChromaDBAdapter from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.ontology.bioportal_wrapper import BioportalWrapper from curate_gpt.wrappers.ontology.ontology_wrapper import OntologyWrapper - from tests import OUTPUT_DIR TEMP_OAKVIEW_DB = OUTPUT_DIR / "bioportal_tmp" diff --git a/tests/wrappers/test_clinvar.py b/tests/wrappers/test_clinvar.py index a8b580d..f58c735 100644 --- a/tests/wrappers/test_clinvar.py +++ b/tests/wrappers/test_clinvar.py @@ -4,12 +4,12 @@ import pytest import yaml + from curate_gpt import ChromaDBAdapter from curate_gpt.agents.chat_agent import ChatAgent from curate_gpt.agents.dragon_agent import DragonAgent from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.clinical.clinvar_wrapper import ClinVarWrapper - from tests import INPUT_DIR, OUTPUT_DIR TEMP_DB = OUTPUT_DIR / "obj_tmp" diff --git a/tests/wrappers/test_evidence_agent.py b/tests/wrappers/test_evidence_agent.py index efaa3bc..a8bd07a 100644 --- a/tests/wrappers/test_evidence_agent.py +++ b/tests/wrappers/test_evidence_agent.py @@ -4,12 +4,12 @@ import pytest import yaml + from curate_gpt import ChromaDBAdapter from curate_gpt.agents.evidence_agent import EvidenceAgent from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers import BaseWrapper from curate_gpt.wrappers.literature import PubmedWrapper, WikipediaWrapper - from tests import OUTPUT_DIR TEMP_PUBMED_DB = OUTPUT_DIR / "pmid_tmp" diff --git a/tests/wrappers/test_gocam.py b/tests/wrappers/test_gocam.py index e146edc..1768554 100644 --- a/tests/wrappers/test_gocam.py +++ b/tests/wrappers/test_gocam.py @@ -2,8 +2,8 @@ import pytest import yaml -from curate_gpt.wrappers.bio.gocam_wrapper import GOCAMWrapper +from curate_gpt.wrappers.bio.gocam_wrapper import GOCAMWrapper from tests import INPUT_DIR diff --git a/tests/wrappers/test_hpoa.py b/tests/wrappers/test_hpoa.py index 99ee085..3ea5da8 100644 --- a/tests/wrappers/test_hpoa.py +++ b/tests/wrappers/test_hpoa.py @@ -2,8 +2,8 @@ import pytest import yaml -from curate_gpt.wrappers.clinical.hpoa_wrapper import HPOAWrapper +from curate_gpt.wrappers.clinical.hpoa_wrapper import HPOAWrapper from tests import INPUT_DIR, OUTPUT_DIR TEMP_DB = OUTPUT_DIR / "obj_tmp" diff --git a/tests/wrappers/test_json.py b/tests/wrappers/test_json.py index 9249db4..df0b0d0 100644 --- a/tests/wrappers/test_json.py +++ b/tests/wrappers/test_json.py @@ -1,5 +1,4 @@ from curate_gpt.wrappers import get_wrapper - from tests import INPUT_DIR diff --git a/tests/wrappers/test_linkml_schema.py b/tests/wrappers/test_linkml_schema.py index acff9a7..e8502f0 100644 --- a/tests/wrappers/test_linkml_schema.py +++ b/tests/wrappers/test_linkml_schema.py @@ -1,5 +1,4 @@ from curate_gpt.wrappers import get_wrapper - from tests import INPUT_DIR diff --git a/tests/wrappers/test_ncbi_biosample.py b/tests/wrappers/test_ncbi_biosample.py index c7b8d31..1aaea1d 100644 --- a/tests/wrappers/test_ncbi_biosample.py +++ b/tests/wrappers/test_ncbi_biosample.py @@ -3,11 +3,11 @@ import time import yaml + from curate_gpt import ChromaDBAdapter from curate_gpt.agents.chat_agent import ChatAgent from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.investigation.ncbi_biosample_wrapper import NCBIBiosampleWrapper - from tests import OUTPUT_DIR TEMP_BIOSAMPLE_DB = OUTPUT_DIR / "biosample_tmp" diff --git a/tests/wrappers/test_ontology.py b/tests/wrappers/test_ontology.py index 6fe0c65..96aba28 100644 --- a/tests/wrappers/test_ontology.py +++ b/tests/wrappers/test_ontology.py @@ -2,11 +2,11 @@ import shutil import pytest +from oaklib import get_adapter + from curate_gpt import ChromaDBAdapter from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.ontology.ontology_wrapper import OntologyWrapper -from oaklib import get_adapter - from tests import INPUT_DIR, OUTPUT_DIR TEMP_OAKVIEW_DB = OUTPUT_DIR / "oaktmp" diff --git a/tests/wrappers/test_pmc.py b/tests/wrappers/test_pmc.py index c174dd2..4844e9f 100644 --- a/tests/wrappers/test_pmc.py +++ b/tests/wrappers/test_pmc.py @@ -4,12 +4,12 @@ import pytest import yaml + from curate_gpt import ChromaDBAdapter from curate_gpt.agents.chat_agent import ChatAgent from curate_gpt.agents.dragon_agent import DragonAgent from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.literature.pmc_wrapper import PMCWrapper - from tests import INPUT_DIR, OUTPUT_DIR TEMP_DB = OUTPUT_DIR / "obj_tmp" diff --git a/tests/wrappers/test_pubmed.py b/tests/wrappers/test_pubmed.py index 1f0d155..64538e9 100644 --- a/tests/wrappers/test_pubmed.py +++ b/tests/wrappers/test_pubmed.py @@ -6,7 +6,6 @@ from curate_gpt.agents.chat_agent import ChatAgent from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.literature import PubmedWrapper - from tests import OUTPUT_DIR TEMP_PUBMED_DB = OUTPUT_DIR / "pmid_tmp" diff --git a/tests/wrappers/test_wikipedia.py b/tests/wrappers/test_wikipedia.py index 5e2d372..bf9526a 100644 --- a/tests/wrappers/test_wikipedia.py +++ b/tests/wrappers/test_wikipedia.py @@ -3,10 +3,10 @@ import time import yaml + from curate_gpt import ChromaDBAdapter from curate_gpt.extract import BasicExtractor from curate_gpt.wrappers.literature import WikipediaWrapper - from tests import OUTPUT_DIR TEMP_Wikipedia_DB = OUTPUT_DIR / "wp_tmp"