crystalai's picture
Upload 42 files
cf165dd verified
import React, { useState, useEffect, useRef, useCallback } from 'react';
import { LiveServerMessage, LiveSession, Blob } from '@google/genai';
import { connectLive } from '../services/geminiService';
import { decode, encode, decodeAudioData } from '../utils';
import { Spinner } from '../components/Spinner';
type ConnectionState = 'disconnected' | 'connecting' | 'connected' | 'error';
const LiveConversationModule: React.FC = () => {
const [connectionState, setConnectionState] = useState<ConnectionState>('disconnected');
const [userTranscription, setUserTranscription] = useState('');
const [modelTranscription, setModelTranscription] = useState('');
const [history, setHistory] = useState<{ user: string, model: string }[]>([]);
const sessionRef = useRef<LiveSession | null>(null);
const inputAudioContextRef = useRef<AudioContext | null>(null);
const outputAudioContextRef = useRef<AudioContext | null>(null);
const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);
const mediaStreamSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
const nextStartTimeRef = useRef<number>(0);
const audioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
const sessionPromiseRef = useRef<Promise<LiveSession> | null>(null);
const cleanup = useCallback(() => {
if (sessionRef.current) {
sessionRef.current.close();
sessionRef.current = null;
}
if (scriptProcessorRef.current) {
scriptProcessorRef.current.disconnect();
scriptProcessorRef.current = null;
}
if (mediaStreamSourceRef.current) {
mediaStreamSourceRef.current.disconnect();
mediaStreamSourceRef.current = null;
}
if (inputAudioContextRef.current && inputAudioContextRef.current.state !== 'closed') {
inputAudioContextRef.current.close();
}
if (outputAudioContextRef.current && outputAudioContextRef.current.state !== 'closed') {
outputAudioContextRef.current.close();
}
audioSourcesRef.current.forEach(source => source.stop());
audioSourcesRef.current.clear();
nextStartTimeRef.current = 0;
setConnectionState('disconnected');
}, []);
const startConversation = useCallback(async () => {
setConnectionState('connecting');
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
inputAudioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 16000 });
outputAudioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 24000 });
const callbacks = {
onopen: () => {
setConnectionState('connected');
const source = inputAudioContextRef.current!.createMediaStreamSource(stream);
mediaStreamSourceRef.current = source;
const scriptProcessor = inputAudioContextRef.current!.createScriptProcessor(4096, 1, 1);
scriptProcessorRef.current = scriptProcessor;
scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
const inputData = audioProcessingEvent.inputBuffer.getChannelData(0);
const pcmBlob: Blob = {
data: encode(new Uint8Array(new Int16Array(inputData.map(x => x * 32768)).buffer)),
mimeType: 'audio/pcm;rate=16000',
};
sessionPromiseRef.current?.then((session) => {
session.sendRealtimeInput({ media: pcmBlob });
});
};
source.connect(scriptProcessor);
scriptProcessor.connect(inputAudioContextRef.current!.destination);
},
onmessage: async (message: LiveServerMessage) => {
if (message.serverContent?.inputTranscription) {
setUserTranscription(prev => prev + message.serverContent.inputTranscription.text);
}
if (message.serverContent?.outputTranscription) {
setModelTranscription(prev => prev + message.serverContent.outputTranscription.text);
}
if (message.serverContent?.turnComplete) {
setHistory(prev => [...prev, { user: userTranscription, model: modelTranscription }]);
setUserTranscription('');
setModelTranscription('');
}
const base64Audio = message.serverContent?.modelTurn?.parts[0]?.inlineData?.data;
if (base64Audio && outputAudioContextRef.current) {
nextStartTimeRef.current = Math.max(nextStartTimeRef.current, outputAudioContextRef.current.currentTime);
const audioBuffer = await decodeAudioData(decode(base64Audio), outputAudioContextRef.current, 24000, 1);
const source = outputAudioContextRef.current.createBufferSource();
source.buffer = audioBuffer;
source.connect(outputAudioContextRef.current.destination);
source.addEventListener('ended', () => audioSourcesRef.current.delete(source));
source.start(nextStartTimeRef.current);
nextStartTimeRef.current += audioBuffer.duration;
audioSourcesRef.current.add(source);
}
},
onerror: (e: ErrorEvent) => {
console.error('Live API Error:', e);
setConnectionState('error');
cleanup();
},
onclose: (e: CloseEvent) => {
console.log('Live API Closed:', e);
cleanup();
},
};
sessionPromiseRef.current = connectLive(callbacks);
sessionRef.current = await sessionPromiseRef.current;
} catch (error) {
console.error('Failed to start conversation:', error);
setConnectionState('error');
}
}, [cleanup, userTranscription, modelTranscription]);
useEffect(() => {
return () => cleanup();
}, [cleanup]);
return (
<div className="flex flex-col h-full w-full max-w-4xl mx-auto">
<h2 className="text-2xl font-bold text-cyan-300 mb-4 text-center">Live Conversation</h2>
<div className="flex items-center justify-center mb-4">
{connectionState === 'disconnected' && <button onClick={startConversation} className="bg-green-600 hover:bg-green-500 text-white font-bold py-2 px-4 rounded">Start Conversation</button>}
{connectionState === 'connecting' && <Spinner text="Connecting..." />}
{connectionState === 'connected' && <button onClick={cleanup} className="bg-red-600 hover:bg-red-500 text-white font-bold py-2 px-4 rounded">End Conversation</button>}
{connectionState === 'error' && <p className="text-red-400">Connection error. Please try again.</p>}
</div>
<div className="flex-grow overflow-y-auto p-4 bg-gray-800/50 rounded-lg border border-cyan-500/10 h-[50vh]">
{history.map((turn, index) => (
<div key={index}>
<p className="text-cyan-300"><strong>You:</strong> {turn.user}</p>
<p className="text-purple-300 mb-4"><strong>Codex:</strong> {turn.model}</p>
</div>
))}
{userTranscription && <p className="text-cyan-300"><strong>You:</strong> {userTranscription}...</p>}
{modelTranscription && <p className="text-purple-300"><strong>Codex:</strong> {modelTranscription}...</p>}
{connectionState === 'connected' && history.length === 0 && !userTranscription && <p className="text-gray-400">Listening... Speak into your microphone.</p>}
</div>
</div>
);
};
export default LiveConversationModule;