generated from deepgram/oss-repo-template
-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #251 from deepgram/sr/speak-endpoint
feat: speak endpoint added
- Loading branch information
Showing
6 changed files
with
157 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
const { createClient } = require("../../dist/main/index"); | ||
const fs = require("fs"); | ||
|
||
const deepgram = createClient(process.env.DEEPGRAM_API_KEY); | ||
|
||
const text = "Hello, how can I help you today?"; | ||
|
||
const getAudio = async () => { | ||
const response = await deepgram.speak.request({ text }, { model: "aura-asteria-en" }); | ||
const stream = await response.getStream(); | ||
const headers = await response.getHeaders(); | ||
if (stream) { | ||
const buffer = await getAudioBuffer(stream); | ||
|
||
fs.writeFile("audio.wav", buffer, (err) => { | ||
if (err) { | ||
console.error("Error writing audio to file:", err); | ||
} else { | ||
console.log("Audio file written to audio.wav"); | ||
} | ||
}); | ||
} else { | ||
console.error("Error generating audio:", stream); | ||
} | ||
|
||
if (headers) { | ||
console.log("Headers:", headers); | ||
} | ||
}; | ||
|
||
// helper function to convert stream to audio buffer | ||
const getAudioBuffer = async (response) => { | ||
const reader = response.getReader(); | ||
const chunks = []; | ||
|
||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) break; | ||
|
||
chunks.push(value); | ||
} | ||
|
||
const dataArray = chunks.reduce( | ||
(acc, chunk) => Uint8Array.from([...acc, ...chunk]), | ||
new Uint8Array(0) | ||
); | ||
|
||
return Buffer.from(dataArray.buffer); | ||
}; | ||
|
||
getAudio(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
export interface SpeakSchema extends Record<string, unknown> { | ||
/** | ||
* The model, voice, language, and version of the voice. | ||
* Follows the format of[modelname]-[voicename]-[language]-[version]. | ||
*/ | ||
model?: string; | ||
|
||
/** | ||
* Encoding options for the output audio. Default is 'mp3'. | ||
*/ | ||
encoding?: "linear16" | "mulaw" | "alaw" | "mp3" | "opus" | "flac" | "aac"; | ||
|
||
/** | ||
* File format wrapper for the audio. | ||
*/ | ||
container?: string; | ||
|
||
/** | ||
* Sample rate of the audio output. | ||
*/ | ||
sample_rate?: number; | ||
|
||
/** | ||
* Bit rate of the audio output. | ||
*/ | ||
bit_rate?: number; | ||
|
||
[key: string]: unknown; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { AbstractRestfulClient } from "./AbstractRestfulClient"; | ||
import { DeepgramError, DeepgramUnknownError, isDeepgramError } from "../lib/errors"; | ||
import { appendSearchParams, isTextSource } from "../lib/helpers"; | ||
import { Fetch, SpeakSchema, TextSource } from "../lib/types"; | ||
|
||
export class SpeakClient extends AbstractRestfulClient { | ||
public result: undefined | Response; | ||
|
||
/** | ||
* @see https://developers.deepgram.com/reference/text-to-speech-api | ||
*/ | ||
async request( | ||
source: TextSource, | ||
options?: SpeakSchema, | ||
endpoint = "v1/speak" | ||
): Promise<SpeakClient> { | ||
try { | ||
let body; | ||
|
||
if (isTextSource(source)) { | ||
body = JSON.stringify(source); | ||
} else { | ||
throw new DeepgramError("Unknown transcription source type"); | ||
} | ||
|
||
const speakOptions: SpeakSchema = { ...{ model: "aura-asteria-en" }, ...options }; | ||
|
||
const url = new URL(endpoint, this.baseUrl); | ||
appendSearchParams(url.searchParams, speakOptions); | ||
this.result = await this._handleRawRequest(this.fetch as Fetch, "POST", url, {}, {}, body); | ||
return this; | ||
} catch (error) { | ||
throw error; | ||
} | ||
} | ||
|
||
async getStream(): Promise<ReadableStream<Uint8Array> | null> { | ||
if (!this.result) | ||
throw new DeepgramUnknownError("Tried to get stream before making request", ""); | ||
|
||
return this.result.body; | ||
} | ||
|
||
async getHeaders(): Promise<Headers> { | ||
if (!this.result) | ||
throw new DeepgramUnknownError("Tried to get headers before making request", ""); | ||
|
||
return this.result.headers; | ||
} | ||
} |