Merge pull request #251 from deepgram/sr/speak-endpoint

feat: speak endpoint added
deepgram · Mar 11, 2024 · 279ad76 · 279ad76
2 parents 0212338 + 8948856
commit 279ad76
Show file tree

Hide file tree

Showing 6 changed files with 157 additions and 0 deletions.
diff --git a/examples/node-speak/index.js b/examples/node-speak/index.js
@@ -0,0 +1,51 @@
+const { createClient } = require("../../dist/main/index");
+const fs = require("fs");
+
+const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
+
+const text = "Hello, how can I help you today?";
+
+const getAudio = async () => {
+  const response = await deepgram.speak.request({ text }, { model: "aura-asteria-en" });
+  const stream = await response.getStream();
+  const headers = await response.getHeaders();
+  if (stream) {
+    const buffer = await getAudioBuffer(stream);
+
+    fs.writeFile("audio.wav", buffer, (err) => {
+      if (err) {
+        console.error("Error writing audio to file:", err);
+      } else {
+        console.log("Audio file written to audio.wav");
+      }
+    });
+  } else {
+    console.error("Error generating audio:", stream);
+  }
+
+  if (headers) {
+    console.log("Headers:", headers);
+  }
+};
+
+// helper function to convert stream to audio buffer
+const getAudioBuffer = async (response) => {
+  const reader = response.getReader();
+  const chunks = [];
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    chunks.push(value);
+  }
+
+  const dataArray = chunks.reduce(
+    (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
+    new Uint8Array(0)
+  );
+
+  return Buffer.from(dataArray.buffer);
+};
+
+getAudio();
diff --git a/src/DeepgramClient.ts b/src/DeepgramClient.ts
@@ -4,6 +4,7 @@ import { ListenClient } from "./packages/ListenClient";
 import { ManageClient } from "./packages/ManageClient";
 import { OnPremClient } from "./packages/OnPremClient";
 import { ReadClient } from "./packages/ReadClient";
+import { SpeakClient } from "./packages/SpeakClient";
 
 /**
  * Deepgram Client.
@@ -28,6 +29,10 @@ export default class DeepgramClient extends AbstractClient {
     return new ReadClient(this.key, this.options);
   }
 
+  get speak(): SpeakClient {
+    return new SpeakClient(this.key, this.options);
+  }
+
   /**
    * Major version fallback errors are below
    *

diff --git a/src/lib/types/SpeakSchema.ts b/src/lib/types/SpeakSchema.ts
@@ -0,0 +1,29 @@
+export interface SpeakSchema extends Record<string, unknown> {
+  /**
+   * The model, voice, language, and version of the voice.
+   * Follows the format of[modelname]-[voicename]-[language]-[version].
+   */
+  model?: string;
+
+  /**
+   * Encoding options for the output audio. Default is 'mp3'.
+   */
+  encoding?: "linear16" | "mulaw" | "alaw" | "mp3" | "opus" | "flac" | "aac";
+
+  /**
+   * File format wrapper for the audio.
+   */
+  container?: string;
+
+  /**
+   * Sample rate of the audio output.
+   */
+  sample_rate?: number;
+
+  /**
+   * Bit rate of the audio output.
+   */
+  bit_rate?: number;
+
+  [key: string]: unknown;
+}
diff --git a/src/lib/types/index.ts b/src/lib/types/index.ts
@@ -42,6 +42,7 @@ export type {
   AnalyzeSource,
 } from "./DeepgramSource";
 export type { SendProjectInviteSchema } from "./SendProjectInviteSchema";
+export type { SpeakSchema } from "./SpeakSchema";
 export type { SpeechStartedEvent } from "./SpeechStartedEvent";
 export type { SyncPrerecordedResponse } from "./SyncPrerecordedResponse";
 export type { SyncAnalyzeResponse } from "./SyncAnalyzeResponse";

diff --git a/src/packages/AbstractRestfulClient.ts b/src/packages/AbstractRestfulClient.ts
@@ -76,13 +76,34 @@ export abstract class AbstractRestfulClient extends AbstractClient {
       fetcher(url, this._getRequestParams(method, headers, parameters, body))
         .then((result) => {
           if (!result.ok) throw result;
+
           return result.json();
         })
         .then((data) => resolve(data))
         .catch((error) => this.handleError(error, reject));
     });
   }
 
+  protected async _handleRawRequest(
+    fetcher: Fetch,
+    method: RequestMethodType,
+    url: string | URL,
+    headers?: Record<string, string>,
+    parameters?: FetchParameters,
+    body?: string | Buffer | Readable
+  ): Promise<any> {
+    return new Promise((resolve, reject) => {
+      fetcher(url, this._getRequestParams(method, headers, parameters, body))
+        .then((result) => {
+          if (!result.ok) throw result;
+
+          return result;
+        })
+        .then((data) => resolve(data))
+        .catch((error) => this.handleError(error, reject));
+    });
+  }
+
   protected async get(
     fetcher: Fetch,
     url: string | URL,

diff --git a/src/packages/SpeakClient.ts b/src/packages/SpeakClient.ts
@@ -0,0 +1,50 @@
+import { AbstractRestfulClient } from "./AbstractRestfulClient";
+import { DeepgramError, DeepgramUnknownError, isDeepgramError } from "../lib/errors";
+import { appendSearchParams, isTextSource } from "../lib/helpers";
+import { Fetch, SpeakSchema, TextSource } from "../lib/types";
+
+export class SpeakClient extends AbstractRestfulClient {
+  public result: undefined | Response;
+
+  /**
+   * @see https://developers.deepgram.com/reference/text-to-speech-api
+   */
+  async request(
+    source: TextSource,
+    options?: SpeakSchema,
+    endpoint = "v1/speak"
+  ): Promise<SpeakClient> {
+    try {
+      let body;
+
+      if (isTextSource(source)) {
+        body = JSON.stringify(source);
+      } else {
+        throw new DeepgramError("Unknown transcription source type");
+      }
+
+      const speakOptions: SpeakSchema = { ...{ model: "aura-asteria-en" }, ...options };
+
+      const url = new URL(endpoint, this.baseUrl);
+      appendSearchParams(url.searchParams, speakOptions);
+      this.result = await this._handleRawRequest(this.fetch as Fetch, "POST", url, {}, {}, body);
+      return this;
+    } catch (error) {
+      throw error;
+    }
+  }
+
+  async getStream(): Promise<ReadableStream<Uint8Array> | null> {
+    if (!this.result)
+      throw new DeepgramUnknownError("Tried to get stream before making request", "");
+
+    return this.result.body;
+  }
+
+  async getHeaders(): Promise<Headers> {
+    if (!this.result)
+      throw new DeepgramUnknownError("Tried to get headers before making request", "");
+
+    return this.result.headers;
+  }
+}