Skip to content

Commit

Permalink
update prompt (#1137)
Browse files Browse the repository at this point in the history
* update prompt

* test and fix. Add dockerfile.dev

* up
  • Loading branch information
shreyaspimpalgaonkar authored Sep 12, 2024
1 parent 772eb93 commit d762261
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 12 deletions.
1 change: 1 addition & 0 deletions py/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ ARG PORT=8000
ARG HOST=0.0.0.0
ENV PORT=$PORT HOST=$HOST
EXPOSE $PORT
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

# Run the application
CMD ["sh", "-c", "uvicorn core.main.app_entry:app --host $HOST --port $PORT"]
49 changes: 49 additions & 0 deletions py/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
FROM python:3.10-slim AS builder
# Install system dependencies (including those needed for Unstructured and OpenCV)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \
poppler-utils libmagic1 \
libgl1-mesa-glx libglib2.0-0 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir poetry

RUN mkdir -p /app/py
WORKDIR /app/py
COPY pyproject.toml /app/py/pyproject.toml

# Install the dependencies, including gunicorn, uvicorn, and unstructured
RUN poetry config virtualenvs.create false \
&& poetry install --extras "core" --no-dev --no-root \
&& pip install --no-cache-dir gunicorn uvicorn
# Create the final image
FROM python:3.10-slim
# Install runtime dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl tesseract-ocr poppler-utils libmagic1 \
libgl1-mesa-glx libglib2.0-0 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

RUN python -c "from unstructured.partition.model_init import initialize; initialize()"


# Expose the port and set environment variables
ARG PORT=8000 HOST=0.0.0.0
ENV PORT=$PORT HOST=$HOST TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
EXPOSE $PORT

COPY . /app
# Copy the application and config
COPY core /app/core
COPY r2r /app/r2r
COPY r2r.toml /app/r2r.toml
COPY pyproject.toml /app/pyproject.toml

CMD ["sh", "-c", "uvicorn core.main.app_entry:app --host $HOST --port $PORT"]
10 changes: 9 additions & 1 deletion py/cli/commands/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ def generate_report():
default=False,
help="Use the unstructured Docker image",
)
@click.option(
"--dev",
is_flag=True,
default=False,
help="Run in development mode",
)
def serve(
host,
port,
Expand All @@ -228,6 +234,7 @@ def serve(
config_path,
build,
unstructured,
dev,
):
"""Start the R2R server."""
load_dotenv()
Expand Down Expand Up @@ -280,7 +287,7 @@ def serve(
"-t",
image,
"-f",
"Dockerfile",
f"Dockerfile{'.dev' if dev else ''}",
".",
],
check=True,
Expand Down Expand Up @@ -308,6 +315,7 @@ def serve(
image,
config_name,
config_path,

)
if (
"pytest" in sys.modules
Expand Down
4 changes: 3 additions & 1 deletion py/core/base/abstractions/prompt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Abstraction for a prompt that can be formatted with inputs."""

from typing import Any

from datetime import datetime
from pydantic import BaseModel


Expand All @@ -10,6 +10,8 @@ class Prompt(BaseModel):

name: str
template: str
created_at: datetime = datetime.now()
updated_at: datetime = datetime.now()
input_types: dict[str, str]

def format_prompt(self, inputs: dict[str, Any]) -> str:
Expand Down
2 changes: 2 additions & 0 deletions py/core/base/api/models/management/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class UpdatePromptResponse(BaseModel):
class PromptResponse(BaseModel):
name: str
template: str
created_at: datetime
updated_at: datetime
input_types: Dict[str, str]


Expand Down
36 changes: 26 additions & 10 deletions py/core/providers/prompts/r2r_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from pathlib import Path
from typing import Any, Optional
from uuid import uuid4
from uuid import uuid5, NAMESPACE_DNS

import yaml
from sqlalchemy import text
Expand Down Expand Up @@ -55,15 +55,19 @@ def create_table(self):
def _load_prompts_from_database(self):
query = text(
f"""
SELECT prompt_id, name, template, input_types
SELECT prompt_id, name, template, input_types, created_at, updated_at
FROM {self._get_table_name('prompts')}
"""
)
results = self.execute_query(query).fetchall()
for row in results:
prompt_id, name, template, input_types = row
prompt_id, name, template, input_types, created_at, updated_at = row
self.prompts[name] = Prompt(
name=name, template=template, input_types=input_types
name=name,
template=template,
input_types=input_types,
created_at=created_at,
updated_at=updated_at,
)

def _load_prompts_from_yaml_directory(
Expand All @@ -88,11 +92,18 @@ def _load_prompts_from_yaml_directory(
data = yaml.safe_load(file)
for name, prompt_data in data.items():
if name not in self.prompts:
modify_prompt = True
else:
modify_prompt = self.prompts[name].created_at == self.prompts[name].updated_at

if modify_prompt:
self.add_prompt(
name,
prompt_data["template"],
prompt_data.get("input_types", {}),
modify_created_at = True,
)

except yaml.YAMLError as e:
error_msg = (
f"Error loading prompts from YAML file {yaml_file}: {e}"
Expand All @@ -105,13 +116,11 @@ def _load_prompts_from_yaml_directory(
raise ValueError(error_msg)

def add_prompt(
self, name: str, template: str, input_types: dict[str, str]
self, name: str, template: str, input_types: dict[str, str], modify_created_at: bool = False
) -> None:
if name in self.prompts:
raise ValueError(f"Prompt '{name}' already exists.")
prompt = Prompt(name=name, template=template, input_types=input_types)
self.prompts[name] = prompt
self._save_prompt_to_database(prompt)
self._save_prompt_to_database(prompt, modify_created_at=modify_created_at)

def get_prompt(
self,
Expand Down Expand Up @@ -165,7 +174,13 @@ def delete_prompt(self, name: str) -> None:
)
self.execute_query(query, {"name": name})

def _save_prompt_to_database(self, prompt: Prompt):
def _save_prompt_to_database(self, prompt: Prompt, modify_created_at: bool = False):

if modify_created_at:
modify_created_at_clause = "created_at = NOW(),"
else:
modify_created_at_clause = ""

query = text(
f"""
INSERT INTO {self._get_table_name('prompts')}
Expand All @@ -174,13 +189,14 @@ def _save_prompt_to_database(self, prompt: Prompt):
ON CONFLICT (name) DO UPDATE SET
template = EXCLUDED.template,
input_types = EXCLUDED.input_types,
{modify_created_at_clause}
updated_at = NOW();
"""
)
result = self.execute_query(
query,
{
"prompt_id": uuid4(),
"prompt_id": uuid5(NAMESPACE_DNS, prompt.name),
"name": prompt.name,
"template": prompt.template,
"input_types": json.dumps(prompt.input_types),
Expand Down

0 comments on commit d762261

Please sign in to comment.