Skip to content

Commit

Permalink
Merge pull request #9 from turboflo/8-add-documentation
Browse files Browse the repository at this point in the history
8 add documentation
  • Loading branch information
turboflo committed Sep 1, 2024
2 parents f4daf00 + 2a05834 commit 05b91a4
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 38 deletions.
Empty file.
14 changes: 11 additions & 3 deletions src/horizon_match/application/interfaces/comparison_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,24 @@


class ComparisonService(ABC):
"""Abstract base class for comparison services.
This class defines an interface for comparing two project descriptions. Concrete implementations
should provide the specifics of how the comparison is performed and how the results are generated.
Methods:
compare (str, str) -> Comparison: Compare two project descriptions and return a Comparison object.
"""

@abstractmethod
def compare(self, my_project: str, existing_project: str) -> Comparison:
"""
Compare two project descriptions and return a ComparisonResult.
"""Compare two project descriptions and return a Comparison object.
Args:
my_project (str): The description of the user's project idea.
existing_project (str): The description of an existing project to compare against.
Returns:
ComparisonResult: An object containing the comparison details.
Comparison: An object containing the comparison details.
"""
pass
13 changes: 11 additions & 2 deletions src/horizon_match/application/interfaces/vector_search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,19 @@


class VectorSearchService(ABC):
"""Abstract base class for vector search services.
This class defines an interface for performing vector-based searches to find projects similar
to a given query. Concrete implementations should provide the specifics of how the search
is executed and how the results are retrieved.
Methods:
search (str, int) -> List[Project]: Perform a vector search for similar projects based on the given query.
"""

@abstractmethod
def search(self, query: str, k: int) -> List[Project]:
"""
Perform a vector search for similar projects based on the given query.
"""Perform a vector search for similar projects based on the given query.
Args:
query (str): The project description or search query.
Expand Down
38 changes: 25 additions & 13 deletions src/horizon_match/application/use_cases/compare_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,53 @@


class CompareProjects:
"""Handles the comparison of a query project against a set of similar projects.
This class uses a vector search service to find projects similar to the query and then uses
a comparison service to evaluate the similarity of each found project. Results are sorted
by similarity score.
Attributes:
vector_search_service (VectorSearchService): Service for performing vector-based searches to find similar projects.
comparison_service (ComparisonService): Service for comparing project descriptions to determine similarity.
"""

def __init__(
self,
vector_search_service: VectorSearchService,
comparison_service: ComparisonService,
):
) -> None:
"""Initialize CompareProjects with vector search and comparison services.
Args:
vector_search_service (VectorSearchService): The service to search for similar projects.
comparison_service (ComparisonService): The service to compare project descriptions.
"""
self.vector_search_service = vector_search_service
self.comparison_service = comparison_service

def execute(self, query: str, k: int) -> List[HorizonMatchResult]:
"""
Execute the project comparison use case.
This method performs a vector search to find similar projects,
then compares each result with the input query.
"""Perform the comparison of the query project with similar projects.
Args:
query (str): The project description or search query.
query (str): The description of the query project to compare.
k (int): The number of similar projects to retrieve and compare.
Returns:
List[ComparisonResult]: A list of comparison results for the most similar projects.
List[HorizonMatchResult]: A list of HorizonMatchResult objects, sorted by similarity score in descending order.
"""

# Perform vector search to find similar projects
similar_projects = self.vector_search_service.search(query, k)

# Compare the query with each similar project
results = []
results: List[HorizonMatchResult] = []
for project in similar_projects:
# Update state to show which project is being compared 1/k
# Compare the query description with the project description
comparison = self.comparison_service.compare(query, project.description)
result = HorizonMatchResult(project, comparison)
result = HorizonMatchResult(project=project, comparison=comparison)
results.append(result)

# Sort project by AI similarity score
# Sort results by AI similarity score in descending order
results.sort(key=lambda x: x.comparison.score, reverse=True)

return results
Empty file.
23 changes: 17 additions & 6 deletions src/horizon_match/domain/entities/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,38 @@


class Comparison(BaseModel):
"""Represents the result of a comparison between two projects.
Attributes:
summary (str): A concise, one-sentence summary of the existing EU Horizon project, highlighting its primary objectives and key innovations.
similarity (str): Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders.
difference (str): Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives.
score (float): Similarity score ranging from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects.
confidence (float): Confidence score ranging from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis.
reason (str): Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions.
"""

summary: str = Field(
...,
description="Concise, one-sentence summary of the existing EU Horizon project, highlighting primary objectives and key innovations",
description="Concise, one-sentence summary of the existing EU Horizon project, highlighting primary objectives and key innovations.",
)
similarity: str = Field(
...,
description="Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders",
description="Detailed analysis of significant commonalities between the projects, considering research goals, methodologies, technological focus, impacts, and stakeholders.",
)
difference: str = Field(
...,
description="Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives",
description="Comprehensive examination of notable distinctions between the projects, addressing scope, techniques, innovations, geographic focus, and alignment with EU Horizon objectives.",
)
score: float = Field(
...,
description="Similarity score from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects",
description="Similarity score from 0 to 1 (with two decimal places precision), where 0 indicates no similarity and 1 indicates identical projects.",
)
confidence: float = Field(
...,
description="Confidence score from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis",
description="Confidence score from 0 to 1 (with two decimal places precision), where 0 indicates low confidence and 1 indicates high confidence in the accuracy and reliability of the similarity score and analysis.",
)
reason: str = Field(
...,
description="Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions",
description="Thorough, evidence-based justification for the assigned similarity and confidence scores, referencing specific elements from both project descriptions.",
)
7 changes: 7 additions & 0 deletions src/horizon_match/domain/entities/horizon_match_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,12 @@

@dataclass
class HorizonMatchResult:
"""Represents the result of a comparison between a project and another entity.
Attributes:
project (Project): The project that is being compared.
comparison (Comparison): The result of the comparison, including similarity and analysis details.
"""

project: Project
comparison: Comparison
12 changes: 11 additions & 1 deletion src/horizon_match/domain/entities/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@


class Project(BaseModel):
"""Represents a project with details for comparison and indexing.
Attributes:
id (str): Unique identifier for the project.
description (str): Detailed description of the project.
title (Optional[str]): Title of the project. Defaults to None.
content_update_date (Optional[str]): Date when the project content was last updated. Defaults to None.
similarity (Optional[float]): Similarity score of the project compared to another project. Defaults to None.
"""

id: str = Field(..., description="Unique identifier for the project")
description: str = Field(..., description="Detailed description of the project")
title: Optional[str] = Field(None, description="Title of the project")
content_update_date: Optional[str] = Field(
None, description="Date when the project content was last updated"
)
similarity: Optional[float] = Field(
None, description="Simmilarity score of the project"
None, description="Similarity score of the project compared to another project"
)
Empty file.
32 changes: 30 additions & 2 deletions src/horizon_match/infrastructure/config/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,34 @@


class ConfigManager:
def __init__(self, config_path: str = "config.yml"):
"""Manages configuration settings loaded from a YAML file and environment variables.
This class reads configuration settings from a YAML file and allows access to those settings.
It also supports interpolation of environment variables in the configuration values.
Attributes:
config (Dict[str, Any]): The configuration settings loaded from the YAML file.
"""

def __init__(self, config_path: str = "config.yml") -> None:
"""Initialize ConfigManager with configuration settings.
Args:
config_path (str): Path to the YAML configuration file. Defaults to "config.yml".
"""
load_dotenv()

with open(config_path, "r") as config_file:
self.config = yaml.safe_load(config_file)

self._process_config(self.config)

def _process_config(self, config: Dict[str, Any]):
def _process_config(self, config: Dict[str, Any]) -> None:
"""Process the configuration dictionary to substitute environment variables.
Args:
config (Dict[str, Any]): The configuration dictionary to process.
"""
for key, value in config.items():
if isinstance(value, dict):
self._process_config(value)
Expand All @@ -24,6 +43,15 @@ def _process_config(self, config: Dict[str, Any]):
config[key] = os.getenv(env_var, value)

def get(self, *keys: str, default: Any = None) -> Any:
"""Retrieve a configuration value based on a sequence of keys.
Args:
*keys (str): The sequence of keys to access nested values in the configuration.
default (Any): The default value to return if the key path is not found. Defaults to None.
Returns:
Any: The configuration value associated with the provided keys, or the default value if not found.
"""
result = self.config
for key in keys:
if isinstance(result, dict):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations
from openai import OpenAI
from typing import List
from typing import List, Dict
from horizon_match.application.interfaces.comparison_service import ComparisonService
from horizon_match.domain.entities.comparison import Comparison
from horizon_match.infrastructure.config.config_manager import ConfigManager
Expand All @@ -8,7 +9,23 @@


class OpenAIComparisonService(ComparisonService):
def __init__(self, config: ConfigManager):
"""Service for comparing project descriptions using OpenAI's language model.
This service uses OpenAI to generate detailed comparisons between two project descriptions,
providing insights into their similarities and differences.
Attributes:
config (ConfigManager): Configuration manager for accessing API keys and model information.
client (OpenAI): OpenAI client for generating comparisons.
model (str): The model used for generating the comparison.
"""

def __init__(self, config: ConfigManager) -> None:
"""Initialize OpenAIComparisonService with configuration settings.
Args:
config (ConfigManager): Configuration manager for the service.
"""
self.config = config
openai_api_key = self.config.get(
"horizon-match", "comparison-service", "api_key"
Expand All @@ -17,6 +34,18 @@ def __init__(self, config: ConfigManager):
self.model = self.config.get("horizon-match", "comparison-service", "model")

def compare(self, my_project: str, existing_project: str) -> Comparison:
"""Compare two project descriptions using OpenAI.
Args:
my_project (str): Description of the user's project.
existing_project (str): Description of the existing project.
Returns:
Comparison: A Comparison object containing the results of the comparison.
Raises:
ValueError: If either project description is empty or exceeds the maximum length.
"""
self._validate_input(my_project, "My project")
self._validate_input(existing_project, "Existing project")

Expand All @@ -31,7 +60,16 @@ def compare(self, my_project: str, existing_project: str) -> Comparison:
response_content = completion.choices[0].message.content
return Comparison.model_validate_json(response_content)

def _validate_input(self, project: str, project_name: str):
def _validate_input(self, project: str, project_name: str) -> None:
"""Validate the project description input.
Args:
project (str): The project description to validate.
project_name (str): The name of the project (for error messages).
Raises:
ValueError: If the project description is empty or exceeds the maximum length.
"""
if not project.strip():
raise ValueError(f"{project_name} description cannot be empty")
if len(project) > MAX_PROJECT_LENGTH:
Expand All @@ -41,7 +79,16 @@ def _validate_input(self, project: str, project_name: str):

def _create_comparison_prompt(
self, my_project: str, existing_project: str
) -> List[dict[str, str]]:
) -> List[Dict[str, str]]:
"""Create the prompt for the comparison model.
Args:
my_project (str): Description of the user's project.
existing_project (str): Description of the existing project.
Returns:
List[Dict[str, str]]: A list of messages forming the prompt for the OpenAI model.
"""
return [
{
"role": "system",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import annotations
from pinecone import Pinecone
from openai import OpenAI
from typing import List
Expand All @@ -10,7 +11,23 @@


class PineconeSearchService(VectorSearchService):
def __init__(self, config: ConfigManager):
"""Service for searching and indexing projects using Pinecone and OpenAI.
This service integrates Pinecone for vector-based search and OpenAI for generating embeddings.
Attributes:
config (ConfigManager): Configuration manager for accessing API keys and model information.
index (Pinecone.Index): Pinecone index for storing and querying project embeddings.
openai_client (OpenAI): OpenAI client for generating embeddings.
embedding_model (str): The model used for generating embeddings.
"""

def __init__(self, config: ConfigManager) -> None:
"""Initialize PineconeSearchService with configuration settings.
Args:
config (ConfigManager): Configuration manager for the service.
"""
self.config = config
# Initialize Pinecone
pinecone_api_key = self.config.get(
Expand All @@ -31,6 +48,15 @@ def __init__(self, config: ConfigManager):
)

def search(self, query: str, k: int) -> List[Project]:
"""Search for projects matching the query.
Args:
query (str): The query string to search for.
k (int): The number of top results to return.
Returns:
List[Project]: A list of projects matching the query.
"""
# Generate embedding for the query
embedding_response = self.openai_client.embeddings.create(
model=self.embedding_model, input=query
Expand All @@ -55,7 +81,12 @@ def search(self, query: str, k: int) -> List[Project]:
projects.append(project)
return projects

def index_project(self, project: Project):
def index_project(self, project: Project) -> None:
"""Index a project in Pinecone.
Args:
project (Project): The project to be indexed.
"""
# Generate embedding for the project description
embedding_response = self.openai_client.embeddings.create(
model=self.embedding_model, input=project.description
Expand Down
Loading

0 comments on commit 05b91a4

Please sign in to comment.