Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deepgram mute #408

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

### Changed

= `DeepgramSTTService` now has a mute during speech functionality. When the bot is
speeking, the user's audio is not transcribed and is thereby essentially muted.

### Fixed

### Other

## [0.0.40] - 2024-08-20

### Added

- VAD parameters can now be dynamicallt updated using the
`VADParamsUpdateFrame`.

Expand Down Expand Up @@ -897,4 +910,4 @@ a bit.

## [0.0.2] - 2024-03-12

Initial public release.
Initial public release.
33 changes: 25 additions & 8 deletions src/pipecat/services/deepgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,37 @@
# SPDX-License-Identifier: BSD 2-Clause License
#

import aiohttp

from typing import AsyncGenerator

import aiohttp
from loguru import logger

from pipecat.frames.frames import (
AudioRawFrame,
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
CancelFrame,
EndFrame,
ErrorFrame,
Frame,
InterimTranscriptionFrame,
StartFrame,
SystemFrame,
TranscriptionFrame,
TTSStartedFrame,
TTSStoppedFrame,
TranscriptionFrame)
)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.ai_services import AsyncAIService, TTSService
from pipecat.utils.time import time_now_iso8601

from loguru import logger


# See .env.example for Deepgram configuration needed
try:
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
LiveTranscriptionEvents,
LiveOptions,
LiveTranscriptionEvents,
)
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
Expand Down Expand Up @@ -121,6 +122,7 @@ def __init__(self,
interim_results=True,
smart_format=True,
),
mute_during_speech=False,
**kwargs):
super().__init__(**kwargs)

Expand All @@ -130,14 +132,28 @@ def __init__(self,
api_key, config=DeepgramClientOptions(url=url, options={"keepalive": "true"}))
self._connection = self._client.listen.asynclive.v("1")
self._connection.on(LiveTranscriptionEvents.Transcript, self._on_message)
self.mute_during_speech = mute_during_speech
self.bot_speaking = False

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

# print(f"Is Frame BotStartedSpeakingFrame: {isinstance(Frame, BotStartedSpeakingFrame)}")
# print(f"Frame: {frame}")
if isinstance(frame, BotStartedSpeakingFrame):
print("Bot Speaking")
self.bot_speaking = True
elif isinstance(frame, BotStoppedSpeakingFrame):
print("Bot Stopped Speaking")
self.bot_speaking = False
if isinstance(frame, SystemFrame):
await self.push_frame(frame, direction)
elif isinstance(frame, AudioRawFrame):
await self._connection.send(frame.audio)
# print(f"AUDIO RAW FRAME: {frame}")
if not (self.mute_during_speech and self.bot_speaking):
await self._connection.send(frame.audio)
else:
print("Bot Speaking")
else:
await self.queue_frame(frame, direction)

Expand All @@ -157,6 +173,7 @@ async def cancel(self, frame: CancelFrame):
await self._connection.finish()

async def _on_message(self, *args, **kwargs):
# print(f"ON MESSAGE: {args}, {kwargs}")
result = kwargs["result"]
is_final = result.is_final
transcript = result.channel.alternatives[0].transcript
Expand Down
Loading