Gary Simmons
add transcribe_audio_file function to handle audio file paths for transcription
e1e3a21
| """ | |
| Audio transcription tools for the Agents Course Final Assignment | |
| This module provides tools for transcribing audio files to text. | |
| """ | |
| from smolagents import SpeechToTextTool, tool | |
| from pathlib import Path | |
| def transcribe_audio(audio_bytes: bytes) -> str: | |
| """ | |
| Given an audio file (bytes), return the transcription (text). | |
| Args: | |
| audio_bytes: Raw bytes of the audio file to transcribe. Can be the full contents | |
| of a WAV/MP3/OGG file or other common audio container. The function should | |
| accept bytes and return the recognized text as a string. | |
| Returns: | |
| str: The transcribed text from the audio file. | |
| Raises: | |
| Exception: If transcription fails due to invalid audio format or other errors. | |
| """ | |
| try: | |
| speech_tool = SpeechToTextTool() | |
| transcription = speech_tool.transcribe(audio_bytes) | |
| return transcription | |
| except Exception as e: | |
| raise Exception(f"Failed to transcribe audio: {str(e)}") | |
| def transcribe_audio_file(file_path: str) -> str: | |
| """ | |
| Given an audio file path, read the file and return the transcription (text). | |
| This tool accepts a file path string and handles reading the file internally, | |
| making it suitable for use when the agent cannot directly call open(). | |
| Args: | |
| file_path: Path to the audio file to transcribe. Can be MP3, WAV, OGG, or other | |
| common audio formats. Must be a valid file path accessible on the filesystem. | |
| Returns: | |
| str: The transcribed text from the audio file. | |
| Raises: | |
| Exception: If the file cannot be read or transcription fails. | |
| """ | |
| try: | |
| path = Path(file_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"Audio file not found: {file_path}") | |
| # Read the file bytes | |
| with open(path, "rb") as f: | |
| audio_bytes = f.read() | |
| # Use the existing transcribe_audio logic | |
| speech_tool = SpeechToTextTool() | |
| transcription = speech_tool.transcribe(audio_bytes) | |
| return transcription | |
| except FileNotFoundError as e: | |
| raise Exception(f"File not found: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Failed to transcribe audio file '{file_path}': {str(e)}") | |