Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ea1b6b4

Browse files
authoredMar 20, 2025··
feat: add audio helpers
1 parent 142933a commit ea1b6b4

File tree

3 files changed

+187
-0
lines changed

3 files changed

+187
-0
lines changed
 

‎examples/speech-to-text.ts

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import OpenAI from 'openai';
2+
import { recordAudio } from 'openai/helpers/audio';
3+
4+
const openai = new OpenAI();
5+
6+
async function main(): Promise<void> {
7+
console.log('Recording for 5 seconds...');
8+
const response = await recordAudio({ timeout: 5000, device: 4 });
9+
10+
console.log('Transcribing...');
11+
const transcription = await openai.audio.transcriptions.create({
12+
file: response,
13+
model: 'whisper-1',
14+
});
15+
16+
console.log(transcription.text);
17+
}
18+
19+
main().catch(console.error);

‎examples/text-to-speech.ts

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import OpenAI from 'openai';
2+
import { playAudio } from 'openai/helpers/audio';
3+
4+
const openai = new OpenAI();
5+
6+
const exampleText = `
7+
I see skies of blue and clouds of white
8+
The bright blessed days, the dark sacred nights
9+
And I think to myself
10+
What a wonderful world
11+
`.trim();
12+
13+
async function main(): Promise<void> {
14+
const response = await openai.audio.speech.create({
15+
model: 'tts-1',
16+
voice: 'nova',
17+
input: exampleText,
18+
});
19+
20+
await playAudio(response);
21+
}
22+
23+
main().catch(console.error);

‎src/helpers/audio.ts

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import { File } from 'formdata-node';
2+
import { spawn } from 'node:child_process';
3+
import { Readable } from 'node:stream';
4+
import { platform, versions } from 'node:process';
5+
import { Response } from 'openai/_shims';
6+
7+
const DEFAULT_SAMPLE_RATE = 24000;
8+
const DEFAULT_CHANNELS = 1;
9+
10+
const isNode = Boolean(versions?.node);
11+
12+
const recordingProviders: Record<NodeJS.Platform, string> = {
13+
win32: 'dshow',
14+
darwin: 'avfoundation',
15+
linux: 'alsa',
16+
aix: 'alsa',
17+
android: 'alsa',
18+
freebsd: 'alsa',
19+
haiku: 'alsa',
20+
sunos: 'alsa',
21+
netbsd: 'alsa',
22+
openbsd: 'alsa',
23+
cygwin: 'dshow',
24+
};
25+
26+
function isResponse(stream: NodeJS.ReadableStream | Response | File): stream is Response {
27+
return typeof (stream as any).body !== 'undefined';
28+
}
29+
30+
function isFile(stream: NodeJS.ReadableStream | Response | File): stream is File {
31+
return stream instanceof File;
32+
}
33+
34+
async function nodejsPlayAudio(stream: NodeJS.ReadableStream | Response | File): Promise<void> {
35+
return new Promise((resolve, reject) => {
36+
try {
37+
const ffplay = spawn('ffplay', ['-autoexit', '-nodisp', '-i', 'pipe:0']);
38+
39+
if (isResponse(stream)) {
40+
stream.body.pipe(ffplay.stdin);
41+
} else if (isFile(stream)) {
42+
Readable.from(stream.stream()).pipe(ffplay.stdin);
43+
} else {
44+
stream.pipe(ffplay.stdin);
45+
}
46+
47+
ffplay.on('close', (code: number) => {
48+
if (code !== 0) {
49+
reject(new Error(`ffplay process exited with code ${code}`));
50+
}
51+
resolve();
52+
});
53+
} catch (error) {
54+
reject(error);
55+
}
56+
});
57+
}
58+
59+
export async function playAudio(input: NodeJS.ReadableStream | Response | File): Promise<void> {
60+
if (isNode) {
61+
return nodejsPlayAudio(input);
62+
}
63+
64+
throw new Error(
65+
'Play audio is not supported in the browser yet. Check out https://npm.im/wavtools as an alternative.',
66+
);
67+
}
68+
69+
type RecordAudioOptions = {
70+
signal?: AbortSignal;
71+
device?: number;
72+
timeout?: number;
73+
};
74+
75+
function nodejsRecordAudio({ signal, device, timeout }: RecordAudioOptions = {}): Promise<File> {
76+
return new Promise((resolve, reject) => {
77+
const data: any[] = [];
78+
const provider = recordingProviders[platform];
79+
try {
80+
const ffmpeg = spawn(
81+
'ffmpeg',
82+
[
83+
'-f',
84+
provider,
85+
'-i',
86+
`:${device ?? 0}`, // default audio input device; adjust as needed
87+
'-ar',
88+
DEFAULT_SAMPLE_RATE.toString(),
89+
'-ac',
90+
DEFAULT_CHANNELS.toString(),
91+
'-f',
92+
'wav',
93+
'pipe:1',
94+
],
95+
{
96+
stdio: ['ignore', 'pipe', 'pipe'],
97+
},
98+
);
99+
100+
ffmpeg.stdout.on('data', (chunk) => {
101+
data.push(chunk);
102+
});
103+
104+
ffmpeg.on('error', (error) => {
105+
console.error(error);
106+
reject(error);
107+
});
108+
109+
ffmpeg.on('close', (code) => {
110+
returnData();
111+
});
112+
113+
function returnData() {
114+
const audioBuffer = Buffer.concat(data);
115+
const audioFile = new File([audioBuffer], 'audio.wav', { type: 'audio/wav' });
116+
resolve(audioFile);
117+
}
118+
119+
if (typeof timeout === 'number' && timeout > 0) {
120+
const internalSignal = AbortSignal.timeout(timeout);
121+
internalSignal.addEventListener('abort', () => {
122+
ffmpeg.kill('SIGTERM');
123+
});
124+
}
125+
126+
if (signal) {
127+
signal.addEventListener('abort', () => {
128+
ffmpeg.kill('SIGTERM');
129+
});
130+
}
131+
} catch (error) {
132+
reject(error);
133+
}
134+
});
135+
}
136+
137+
export async function recordAudio(options: RecordAudioOptions = {}) {
138+
if (isNode) {
139+
return nodejsRecordAudio(options);
140+
}
141+
142+
throw new Error(
143+
'Record audio is not supported in the browser. Check out https://npm.im/wavtools as an alternative.',
144+
);
145+
}

0 commit comments

Comments
 (0)
Please sign in to comment.