Merge pull request #9 from turboflo/8-add-documentation

8 add documentation
turboflo · Sep 1, 2024 · 05b91a4 · 05b91a4
2 parents f4daf00 + 2a05834
commit 05b91a4
Show file tree

Hide file tree

Showing 13 changed files with 256 additions and 38 deletions.
diff --git a/src/horizon_match/application/__init__.py b/src/horizon_match/application/__init__.py
diff --git a/src/horizon_match/application/interfaces/comparison_service.py b/src/horizon_match/application/interfaces/comparison_service.py
@@ -3,16 +3,24 @@
 
 
 class ComparisonService(ABC):
+    """Abstract base class for comparison services.
+
+    This class defines an interface for comparing two project descriptions. Concrete implementations
+    should provide the specifics of how the comparison is performed and how the results are generated.
+
+    Methods:
+        compare (str, str) -> Comparison: Compare two project descriptions and return a Comparison object.
+    """
+
     @abstractmethod
     def compare(self, my_project: str, existing_project: str) -> Comparison:
-        """
-        Compare two project descriptions and return a ComparisonResult.
+        """Compare two project descriptions and return a Comparison object.
 
         Args:
             my_project (str): The description of the user's project idea.
             existing_project (str): The description of an existing project to compare against.
 
         Returns:
-            ComparisonResult: An object containing the comparison details.
+            Comparison: An object containing the comparison details.
         """
         pass
diff --git a/src/horizon_match/application/interfaces/vector_search_service.py b/src/horizon_match/application/interfaces/vector_search_service.py
@@ -4,10 +4,19 @@
 
 
 class VectorSearchService(ABC):
+    """Abstract base class for vector search services.
+
+    This class defines an interface for performing vector-based searches to find projects similar
+    to a given query. Concrete implementations should provide the specifics of how the search
+    is executed and how the results are retrieved.
+
+    Methods:
+        search (str, int) -> List[Project]: Perform a vector search for similar projects based on the given query.
+    """
+
     @abstractmethod
     def search(self, query: str, k: int) -> List[Project]:
-        """
-        Perform a vector search for similar projects based on the given query.
+        """Perform a vector search for similar projects based on the given query.
 
         Args:
             query (str): The project description or search query.

diff --git a/src/horizon_match/application/use_cases/compare_projects.py b/src/horizon_match/application/use_cases/compare_projects.py
@@ -7,41 +7,53 @@
 
 
 class CompareProjects:
+    """Handles the comparison of a query project against a set of similar projects.
+
+    This class uses a vector search service to find projects similar to the query and then uses
+    a comparison service to evaluate the similarity of each found project. Results are sorted
+    by similarity score.
+
+    Attributes:
+        vector_search_service (VectorSearchService): Service for performing vector-based searches to find similar projects.
+        comparison_service (ComparisonService): Service for comparing project descriptions to determine similarity.
+    """
+
     def __init__(
         self,
         vector_search_service: VectorSearchService,
         comparison_service: ComparisonService,
-    ):
+    ) -> None:
+        """Initialize CompareProjects with vector search and comparison services.
+
+        Args:
+            vector_search_service (VectorSearchService): The service to search for similar projects.
+            comparison_service (ComparisonService): The service to compare project descriptions.
+        """
         self.vector_search_service = vector_search_service
         self.comparison_service = comparison_service
 
     def execute(self, query: str, k: int) -> List[HorizonMatchResult]:
-        """
-        Execute the project comparison use case.
-
-        This method performs a vector search to find similar projects,
-        then compares each result with the input query.
+        """Perform the comparison of the query project with similar projects.
 
         Args:
-            query (str): The project description or search query.
+            query (str): The description of the query project to compare.
             k (int): The number of similar projects to retrieve and compare.
 
         Returns:
-            List[ComparisonResult]: A list of comparison results for the most similar projects.
+            List[HorizonMatchResult]: A list of HorizonMatchResult objects, sorted by similarity score in descending order.
         """
-
         # Perform vector search to find similar projects
         similar_projects = self.vector_search_service.search(query, k)
 
         # Compare the query with each similar project
-        results = []
+        results: List[HorizonMatchResult] = []
         for project in similar_projects:
-            # Update state to show which project is being compared 1/k
+            # Compare the query description with the project description
             comparison = self.comparison_service.compare(query, project.description)
-            result = HorizonMatchResult(project, comparison)
+            result = HorizonMatchResult(project=project, comparison=comparison)
             results.append(result)
 
-        # Sort project by AI similarity score
+        # Sort results by AI similarity score in descending order
         results.sort(key=lambda x: x.comparison.score, reverse=True)
 
         return results
diff --git a/src/horizon_match/domain/__init__.py b/src/horizon_match/domain/__init__.py
diff --git a/src/horizon_match/domain/entities/comparison.py b/src/horizon_match/domain/entities/comparison.py
@@ -2,27 +2,38 @@
 
 
 class Comparison(BaseModel):
+    """Represents the result of a comparison between two projects.
+
+    Attributes:
+        summary (str): A concise, one-sentence summary of the existing EU Horizon project, highlighting its primary objectives and key innovations.
+        similarity (str): Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders.
+        difference (str): Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives.
+        score (float): Similarity score ranging from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects.
+        confidence (float): Confidence score ranging from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis.
+        reason (str): Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions.
+    """
+
     summary: str = Field(
         ...,
-        description="Concise, one-sentence summary of the existing EU Horizon project, highlighting primary objectives and key innovations",
+        description="Concise, one-sentence summary of the existing EU Horizon project, highlighting primary objectives and key innovations.",
     )
     similarity: str = Field(
         ...,
-        description="Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders",
+        description="Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders.",
     )
     difference: str = Field(
         ...,
-        description="Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives",
+        description="Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives.",
     )
     score: float = Field(
         ...,
-        description="Similarity score from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects",
+        description="Similarity score from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects.",
     )
     confidence: float = Field(
         ...,
-        description="Confidence score from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis",
+        description="Confidence score from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis.",
     )
     reason: str = Field(
         ...,
-        description="Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions",
+        description="Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions.",
     )
diff --git a/src/horizon_match/domain/entities/horizon_match_result.py b/src/horizon_match/domain/entities/horizon_match_result.py
@@ -5,5 +5,12 @@
 
 @dataclass
 class HorizonMatchResult:
+    """Represents the result of a comparison between a project and another entity.
+
+    Attributes:
+        project (Project): The project that is being compared.
+        comparison (Comparison): The result of the comparison, including similarity and analysis details.
+    """
+
     project: Project
     comparison: Comparison
diff --git a/src/horizon_match/domain/entities/project.py b/src/horizon_match/domain/entities/project.py
@@ -3,12 +3,22 @@
 
 
 class Project(BaseModel):
+    """Represents a project with details for comparison and indexing.
+
+    Attributes:
+        id (str): Unique identifier for the project.
+        description (str): Detailed description of the project.
+        title (Optional[str]): Title of the project. Defaults to None.
+        content_update_date (Optional[str]): Date when the project content was last updated. Defaults to None.
+        similarity (Optional[float]): Similarity score of the project compared to another project. Defaults to None.
+    """
+
     id: str = Field(..., description="Unique identifier for the project")
     description: str = Field(..., description="Detailed description of the project")
     title: Optional[str] = Field(None, description="Title of the project")
     content_update_date: Optional[str] = Field(
         None, description="Date when the project content was last updated"
     )
     similarity: Optional[float] = Field(
-        None, description="Simmilarity score of the project"
+        None, description="Similarity score of the project compared to another project"
     )
diff --git a/src/horizon_match/infrastructure/__init__.py b/src/horizon_match/infrastructure/__init__.py
diff --git a/src/horizon_match/infrastructure/config/config_manager.py b/src/horizon_match/infrastructure/config/config_manager.py
@@ -5,15 +5,34 @@
 
 
 class ConfigManager:
-    def __init__(self, config_path: str = "config.yml"):
+    """Manages configuration settings loaded from a YAML file and environment variables.
+
+    This class reads configuration settings from a YAML file and allows access to those settings.
+    It also supports interpolation of environment variables in the configuration values.
+
+    Attributes:
+        config (Dict[str, Any]): The configuration settings loaded from the YAML file.
+    """
+
+    def __init__(self, config_path: str = "config.yml") -> None:
+        """Initialize ConfigManager with configuration settings.
+
+        Args:
+            config_path (str): Path to the YAML configuration file. Defaults to "config.yml".
+        """
         load_dotenv()
 
         with open(config_path, "r") as config_file:
             self.config = yaml.safe_load(config_file)
 
         self._process_config(self.config)
 
-    def _process_config(self, config: Dict[str, Any]):
+    def _process_config(self, config: Dict[str, Any]) -> None:
+        """Process the configuration dictionary to substitute environment variables.
+
+        Args:
+            config (Dict[str, Any]): The configuration dictionary to process.
+        """
         for key, value in config.items():
             if isinstance(value, dict):
                 self._process_config(value)
@@ -24,6 +43,15 @@ def _process_config(self, config: Dict[str, Any]):
                 config[key] = os.getenv(env_var, value)
 
     def get(self, *keys: str, default: Any = None) -> Any:
+        """Retrieve a configuration value based on a sequence of keys.
+
+        Args:
+            *keys (str): The sequence of keys to access nested values in the configuration.
+            default (Any): The default value to return if the key path is not found. Defaults to None.
+
+        Returns:
+            Any: The configuration value associated with the provided keys, or the default value if not found.
+        """
         result = self.config
         for key in keys:
             if isinstance(result, dict):

diff --git a/src/horizon_match/infrastructure/services/openai_comparison_service.py b/src/horizon_match/infrastructure/services/openai_comparison_service.py
@@ -1,5 +1,6 @@
+from __future__ import annotations
 from openai import OpenAI
-from typing import List
+from typing import List, Dict
 from horizon_match.application.interfaces.comparison_service import ComparisonService
 from horizon_match.domain.entities.comparison import Comparison
 from horizon_match.infrastructure.config.config_manager import ConfigManager
@@ -8,7 +9,23 @@
 
 
 class OpenAIComparisonService(ComparisonService):
-    def __init__(self, config: ConfigManager):
+    """Service for comparing project descriptions using OpenAI's language model.
+
+    This service uses OpenAI to generate detailed comparisons between two project descriptions,
+    providing insights into their similarities and differences.
+
+    Attributes:
+        config (ConfigManager): Configuration manager for accessing API keys and model information.
+        client (OpenAI): OpenAI client for generating comparisons.
+        model (str): The model used for generating the comparison.
+    """
+
+    def __init__(self, config: ConfigManager) -> None:
+        """Initialize OpenAIComparisonService with configuration settings.
+
+        Args:
+            config (ConfigManager): Configuration manager for the service.
+        """
         self.config = config
         openai_api_key = self.config.get(
             "horizon-match", "comparison-service", "api_key"
@@ -17,6 +34,18 @@ def __init__(self, config: ConfigManager):
         self.model = self.config.get("horizon-match", "comparison-service", "model")
 
     def compare(self, my_project: str, existing_project: str) -> Comparison:
+        """Compare two project descriptions using OpenAI.
+
+        Args:
+            my_project (str): Description of the user's project.
+            existing_project (str): Description of the existing project.
+
+        Returns:
+            Comparison: A Comparison object containing the results of the comparison.
+
+        Raises:
+            ValueError: If either project description is empty or exceeds the maximum length.
+        """
         self._validate_input(my_project, "My project")
         self._validate_input(existing_project, "Existing project")
 
@@ -31,7 +60,16 @@ def compare(self, my_project: str, existing_project: str) -> Comparison:
         response_content = completion.choices[0].message.content
         return Comparison.model_validate_json(response_content)
 
-    def _validate_input(self, project: str, project_name: str):
+    def _validate_input(self, project: str, project_name: str) -> None:
+        """Validate the project description input.
+
+        Args:
+            project (str): The project description to validate.
+            project_name (str): The name of the project (for error messages).
+
+        Raises:
+            ValueError: If the project description is empty or exceeds the maximum length.
+        """
         if not project.strip():
             raise ValueError(f"{project_name} description cannot be empty")
         if len(project) > MAX_PROJECT_LENGTH:
@@ -41,7 +79,16 @@ def _validate_input(self, project: str, project_name: str):
 
     def _create_comparison_prompt(
         self, my_project: str, existing_project: str
-    ) -> List[dict[str, str]]:
+    ) -> List[Dict[str, str]]:
+        """Create the prompt for the comparison model.
+
+        Args:
+            my_project (str): Description of the user's project.
+            existing_project (str): Description of the existing project.
+
+        Returns:
+            List[Dict[str, str]]: A list of messages forming the prompt for the OpenAI model.
+        """
         return [
             {
                 "role": "system",

diff --git a/src/horizon_match/infrastructure/services/pinecone_search_service.py b/src/horizon_match/infrastructure/services/pinecone_search_service.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from pinecone import Pinecone
 from openai import OpenAI
 from typing import List
@@ -10,7 +11,23 @@
 
 
 class PineconeSearchService(VectorSearchService):
-    def __init__(self, config: ConfigManager):
+    """Service for searching and indexing projects using Pinecone and OpenAI.
+
+    This service integrates Pinecone for vector-based search and OpenAI for generating embeddings.
+
+    Attributes:
+        config (ConfigManager): Configuration manager for accessing API keys and model information.
+        index (Pinecone.Index): Pinecone index for storing and querying project embeddings.
+        openai_client (OpenAI): OpenAI client for generating embeddings.
+        embedding_model (str): The model used for generating embeddings.
+    """
+
+    def __init__(self, config: ConfigManager) -> None:
+        """Initialize PineconeSearchService with configuration settings.
+
+        Args:
+            config (ConfigManager): Configuration manager for the service.
+        """
         self.config = config
         # Initialize Pinecone
         pinecone_api_key = self.config.get(
@@ -31,6 +48,15 @@ def __init__(self, config: ConfigManager):
         )
 
     def search(self, query: str, k: int) -> List[Project]:
+        """Search for projects matching the query.
+
+        Args:
+            query (str): The query string to search for.
+            k (int): The number of top results to return.
+
+        Returns:
+            List[Project]: A list of projects matching the query.
+        """
         # Generate embedding for the query
         embedding_response = self.openai_client.embeddings.create(
             model=self.embedding_model, input=query
@@ -55,7 +81,12 @@ def search(self, query: str, k: int) -> List[Project]:
             projects.append(project)
         return projects
 
-    def index_project(self, project: Project):
+    def index_project(self, project: Project) -> None:
+        """Index a project in Pinecone.
+
+        Args:
+            project (Project): The project to be indexed.
+        """
         # Generate embedding for the project description
         embedding_response = self.openai_client.embeddings.create(
             model=self.embedding_model, input=project.description