Skip to content

Commit

Permalink
Merge pull request #334 from RamiAwar/add-posthog-anonymous-analytics
Browse files Browse the repository at this point in the history
feat: Added Posthog context manager, tracked events
  • Loading branch information
RamiAwar committed Sep 23, 2024
2 parents 9a87143 + 7e14c43 commit 4325472
Show file tree
Hide file tree
Showing 24 changed files with 295 additions and 31 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
</div>

## 🍿 Watch a quick demo
<a href="https://youtu.be/NN99OTVy7uA"><img src="https://github.com/user-attachments/assets/34dfba7c-7ab5-4a35-8fe1-e40b298ef1ae" height="300" alt="DataLine logo"></a>

<a href="https://youtu.be/NN99OTVy7uA"><img src="https://github.com/user-attachments/assets/34dfba7c-7ab5-4a35-8fe1-e40b298ef1ae" height="300" alt="DataLine logo"></a>

---

Expand Down Expand Up @@ -78,6 +78,9 @@ But you can still influence the direction we go in. We're building this for you,
- [x] Querying data files like CSV, [Excel](#excel-support), SQLite, sas7bdat (more connection types)
- [x] Charting via natural language
- [x] Modifying chart queries and re-rendering/refreshing charts
- [ ] Dashboards and triggers
- [ ] Knowledge base and 'trainable' examples (flavor of RAG)
- [ ] More advanced charting options (bubble, stacks, etc.)

With a lot more coming soon. You can still influence what we build, so if you're a user and you're down for it, we'd love to interview you! Book some time with one of us here:

Expand Down Expand Up @@ -139,6 +142,12 @@ To connect to the frontend, you can then visit:

#### Running manually

Feeling spicy are we? 🌶️
There are a few things you should know. DataLine is split into two parts: the backend and the frontend.

The backend is a Python FastAPI server, and the frontend is a React app.
The frontend also includes our landing page, so you need to set up an env var first!

Check the [backend](./backend/README.md) and [frontend](./frontend/README.md) readmes.

## Authentication
Expand Down Expand Up @@ -182,11 +191,11 @@ To do this, add the environment variable `ALLOWED_ORIGINS` (comma separated list
By default, it is set to `http://localhost:7377,http://0.0.0.0:7377` to make it work with local Docker and local binaries.

For example, running the docker image on a remote server with IP `123.123.12.34`:

```bash
docker run -p 7377:7377 -v dataline:/home/.dataline --name dataline -e ALLOWED_ORIGINS="http://123.123.12.34:7377,https://123.123.12.34:7377" ramiawar/dataline:latest
```


### Excel Support

We support excel files, but they will have to conform to some structure for the time being. We also support multiple sheets - each sheet will be ingested as a separate table.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""added user analytics
Revision ID: 3d35dcd30116
Revises: fa9cefccac47
Create Date: 2024-09-21 23:21:13.944649
"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "3d35dcd30116"
down_revision: Union[str, None] = "fa9cefccac47"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
with op.batch_alter_table("user", schema=None) as batch_op:
batch_op.add_column(sa.Column("analytics_enabled", sa.Boolean(), server_default=sa.text("1"), nullable=False))


def downgrade() -> None:
with op.batch_alter_table("user", schema=None) as batch_op:
batch_op.drop_column("analytics_enabled")
4 changes: 4 additions & 0 deletions backend/dataline/api/auth/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import fastapi

from dataline.auth import validate_credentials
from dataline.utils.posthog import PosthogAnalytics

router = fastapi.APIRouter(
prefix="/auth",
Expand All @@ -16,6 +17,9 @@
async def login(
username: Annotated[str, fastapi.Body()], password: Annotated[str, fastapi.Body()], response: fastapi.Response
) -> fastapi.Response:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "user_logged_in") # type: ignore[no-untyped-call]

validate_credentials(username, password)
ascii_encoded = f"{username}:{password}".encode("ascii")
token = base64.b64encode(ascii_encoded).decode("utf-8")
Expand Down
16 changes: 16 additions & 0 deletions backend/dataline/api/connection/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from dataline.old_models import SuccessListResponse, SuccessResponse
from dataline.repositories.base import AsyncSession, get_session
from dataline.services.connection import ConnectionService
from dataline.utils.posthog import PosthogAnalytics
from dataline.utils.utils import get_sqlite_dsn, is_valid_sqlite_file

logger = logging.getLogger(__name__)
Expand All @@ -30,6 +31,9 @@ async def connect_db(
session: AsyncSession = Depends(get_session),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[ConnectionOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "connection_created", properties={"is_sample": False, "is_file": False}) # type: ignore[no-untyped-call]

connection = await connection_service.create_connection(session, dsn=req.dsn, name=req.name, is_sample=False)
return SuccessResponse(data=connection)

Expand All @@ -40,6 +44,9 @@ async def connect_sample_db(
session: AsyncSession = Depends(get_session),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[ConnectionOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "connection_created", properties={"is_sample": True, "is_file": True}) # type: ignore[no-untyped-call]

# Identify sample, copy file in, then create connection
sample = DB_SAMPLES[req.sample_name.value]

Expand All @@ -60,6 +67,9 @@ async def connect_db_from_file(
session: AsyncSession = Depends(get_session),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[ConnectionOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "connection_created", properties={"is_sample": False, "is_file": True}) # type: ignore[no-untyped-call]

# Validate file type - currently only sqlite supported
if type == FileConnectionType.sqlite:
if not is_valid_sqlite_file(file):
Expand Down Expand Up @@ -134,6 +144,9 @@ async def update_connection(
session: AsyncSession = Depends(get_session),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[GetConnectionOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "connection_updated") # type: ignore[no-untyped-call]

updated_connection = await connection_service.update_connection(session, connection_id, req)

# TODO: Simplify output structure here and on FE
Expand All @@ -160,5 +173,8 @@ async def refresh_connection_schema(
session: AsyncSession = Depends(get_session),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[ConnectionOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "connection_schema_refreshed") # type: ignore[no-untyped-call]

updated_connection = await connection_service.refresh_connection_schema(session, connection_id)
return SuccessResponse(data=updated_connection)
12 changes: 12 additions & 0 deletions backend/dataline/api/conversation/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from dataline.services.conversation import ConversationService
from dataline.services.llm_flow.toolkit import execute_sql_query
from dataline.services.llm_flow.utils import DatalineSQLDatabase as SQLDatabase
from dataline.utils.posthog import PosthogAnalytics
from dataline.utils.utils import generate_with_errors

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,6 +55,8 @@ async def get_conversation_messages(
session: AsyncSession = Depends(get_session),
conversation_service: ConversationService = Depends(),
) -> SuccessListResponse[MessageWithResultsOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "conversation_opened") # type: ignore[no-untyped-call]
conversation = await conversation_service.get_conversation_with_messages(session, conversation_id=conversation_id)
messages = [MessageWithResultsOut.model_validate(message) for message in conversation.messages]
return SuccessListResponse(data=messages)
Expand All @@ -65,6 +68,8 @@ async def create_conversation(
session: AsyncSession = Depends(get_session),
conversation_service: ConversationService = Depends(),
) -> SuccessResponse[ConversationOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "conversation_created") # type: ignore[no-untyped-call]
conversation = await conversation_service.create_conversation(
session, connection_id=conversation_in.connection_id, name=conversation_in.name
)
Expand Down Expand Up @@ -123,6 +128,13 @@ async def execute_sql(
conversation_service: ConversationService = Depends(ConversationService),
connection_service: ConnectionService = Depends(ConnectionService),
) -> SuccessResponse[ResultOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(
user.id,
"sql_executed",
properties={"conversation_id": conversation_id},
) # type: ignore[no-untyped-call]

# Get conversation
# Will raise error that's auto captured by middleware if not exists
conversation = await conversation_service.get_conversation(session, conversation_id=conversation_id)
Expand Down
7 changes: 7 additions & 0 deletions backend/dataline/api/result/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dataline.old_models import SuccessResponse
from dataline.repositories.base import AsyncSession, get_session
from dataline.services.result import ResultService
from dataline.utils.posthog import PosthogAnalytics

router = APIRouter(tags=["results"])

Expand All @@ -19,6 +20,9 @@ async def update_sql_query_result(
session: AsyncSession = Depends(get_session),
result_service: ResultService = Depends(ResultService),
) -> SuccessResponse[None | ChartRefreshOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "sql_updated") # type: ignore[no-untyped-call]

chart_out = await result_service.update_sql_query_result_content(
session, result_id=result_id, sql=sql, for_chart=for_chart
)
Expand All @@ -31,5 +35,8 @@ async def refresh_chart_result_data(
session: AsyncSession = Depends(get_session),
result_service: ResultService = Depends(ResultService),
) -> SuccessResponse[ChartRefreshOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "chart_refreshed") # type: ignore[no-untyped-call]

chart_data = await result_service.refresh_chart_result_data(session, chart_id=result_id)
return SuccessResponse(data=chart_data)
4 changes: 4 additions & 0 deletions backend/dataline/api/settings/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataline.old_models import SuccessResponse
from dataline.repositories.base import AsyncSession, get_session
from dataline.services.settings import SettingsService
from dataline.utils.posthog import PosthogAnalytics

router = APIRouter(prefix="/settings", tags=["settings"])

Expand All @@ -16,6 +17,9 @@ async def upload_avatar(
settings_service: SettingsService = Depends(SettingsService),
session: AsyncSession = Depends(get_session),
) -> SuccessResponse[AvatarOut]:
async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "avatar_uploaded") # type: ignore[no-untyped-call]

media = await settings_service.upload_avatar(session, file)
blob_base64 = base64.b64encode(media.blob).decode("utf-8")
return SuccessResponse(data=AvatarOut(blob=blob_base64))
Expand Down
8 changes: 6 additions & 2 deletions backend/dataline/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dataline.config import IS_BUNDLED, config
from dataline.old_models import SuccessResponse
from dataline.sentry import maybe_init_sentry
from dataline.utils.posthog import PosthogAnalytics

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -55,11 +56,14 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
webbrowser.open("http://localhost:7377", new=2)

await maybe_init_sentry()

async with PosthogAnalytics() as (ph, user):
ph.capture(user.id, "dataline_started")

yield
# On shutdown


app = App(lifespan=lifespan)
app = App(lifespan=lifespan) # type: ignore


@app.get("/healthcheck", response_model_exclude_none=True)
Expand Down
5 changes: 3 additions & 2 deletions backend/dataline/models/user/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sqlalchemy import String, Boolean
from sqlalchemy.sql import true
from sqlalchemy import Boolean, String
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.sql import true

from dataline.models.base import DBModel, UUIDMixin

Expand All @@ -12,4 +12,5 @@ class UserModel(DBModel, UUIDMixin, kw_only=True):
preferred_openai_model: Mapped[str | None] = mapped_column("preferred_openai_model", String, nullable=True)
langsmith_api_key: Mapped[str | None] = mapped_column("langsmith_api_key", String, nullable=True)
sentry_enabled: Mapped[bool] = mapped_column("sentry_enabled", Boolean, server_default=true())
analytics_enabled: Mapped[bool] = mapped_column("analytics_enabled", Boolean, server_default=true())
openai_base_url: Mapped[str | None] = mapped_column("openai_base_url", String, nullable=True)
4 changes: 3 additions & 1 deletion backend/dataline/models/user/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_serializer


logger = logging.getLogger(__name__)


Expand All @@ -14,6 +13,7 @@ class UserUpdateIn(BaseModel):
langsmith_api_key: Optional[SecretStr] = Field(None, min_length=4)
preferred_openai_model: Optional[str] = None
sentry_enabled: Optional[bool] = None
analytics_enabled: Optional[bool] = None

@field_serializer("openai_api_key")
def dump_openai_api_key(self, v: SecretStr) -> str:
Expand All @@ -33,6 +33,7 @@ class UserOut(BaseModel):
langsmith_api_key: Optional[SecretStr] = None
preferred_openai_model: Optional[str] = None
sentry_enabled: bool
analytics_enabled: Optional[bool] = None


class UserWithKeys(BaseModel):
Expand All @@ -45,6 +46,7 @@ class UserWithKeys(BaseModel):
langsmith_api_key: SecretStr | None = None
preferred_openai_model: str
sentry_enabled: bool
analytics_enabled: Optional[bool] = None


class AvatarOut(BaseModel):
Expand Down
2 changes: 2 additions & 0 deletions backend/dataline/repositories/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ class UserCreate(BaseModel):
langsmith_api_key: Optional[str] = None
preferred_openai_model: Optional[str] = None
sentry_enabled: Optional[bool] = True
analytics_enabled: Optional[bool] = True


class UserUpdate(UserCreate):
sentry_enabled: Optional[bool] = None
analytics_enabled: Optional[bool] = None


class UserRepository(BaseRepository[UserModel, UserCreate, UserUpdate]):
Expand Down
8 changes: 8 additions & 0 deletions backend/dataline/services/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ConversationTitleGenerator,
)
from dataline.services.settings import SettingsService
from dataline.utils.posthog import PosthogAnalytics
from dataline.utils.utils import stream_event_str

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -135,6 +136,13 @@ async def query(
query: str,
secure_data: bool = True,
) -> AsyncGenerator[str, None]:
async with PosthogAnalytics() as (ph, user):
ph.capture(
user.id,
"message_sent",
properties={"conversation_id": conversation_id, "is_secure": secure_data},
) # type: ignore[no-untyped-call]

# Get conversation, connection, user settings
conversation = await self.get_conversation(session, conversation_id=conversation_id)
connection = await self.connection_service.get_connection(session, connection_id=conversation.connection_id)
Expand Down
48 changes: 48 additions & 0 deletions backend/dataline/utils/posthog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
from typing import cast

from posthog import Posthog
from posthog.client import Client as PosthogClient

from dataline.config import EnvironmentType, config
from dataline.models.user.model import UserModel
from dataline.repositories.base import SessionCreator
from dataline.repositories.user import UserRepository

logger = logging.getLogger(__name__)
posthog: PosthogClient = Posthog( # type: ignore[no-untyped-call]
project_api_key="phc_bcTdZnbv2IDSiMOAq3UdnHwfJlZvTF0e5ctPMJUzw0i",
host="https://eu.i.posthog.com",
timeout=1, # if more than 1 second, drop it, not worth making the user wait
)

if config.environment == EnvironmentType.development:
posthog.debug = True


class PosthogAnalytics:
"""
Context manager to use Posthog analytics in a safe way with a single point of control.
If a user has disabled analytics, this context manager will not execute the block of code.
"""

async def __aenter__(self) -> tuple[PosthogClient, UserModel]:
async with SessionCreator.begin() as session:
user_repo = UserRepository()
user_info = await user_repo.get_one_or_none(session)
is_enabled = (
user_info is not None
and user_info.analytics_enabled
and config.environment == EnvironmentType.production # disable in dev mode
)

if not is_enabled:
posthog.disabled = True
else:
posthog.disabled = False

user_info = cast(UserModel, user_info)
return posthog, user_info

async def __aexit__(self, exc_type: Exception, exc_val: Exception, exc_tb: Exception) -> None:
pass
Loading

0 comments on commit 4325472

Please sign in to comment.