Agents_Course_Final_Assignment

Sleeping

Agents_Course_Final_Assignment / libs /transcription /transcription_tools.py

Gary Simmons

add transcribe_audio_file function to handle audio file paths for transcription

e1e3a21 4 days ago

2.29 kB

	"""
	Audio transcription tools for the Agents Course Final Assignment

	This module provides tools for transcribing audio files to text.
	"""

	from smolagents import SpeechToTextTool, tool
	from pathlib import Path


	@tool
	def transcribe_audio(audio_bytes: bytes) -> str:
	"""
	Given an audio file (bytes), return the transcription (text).

	Args:
	audio_bytes: Raw bytes of the audio file to transcribe. Can be the full contents
	of a WAV/MP3/OGG file or other common audio container. The function should
	accept bytes and return the recognized text as a string.

	Returns:
	str: The transcribed text from the audio file.

	Raises:
	Exception: If transcription fails due to invalid audio format or other errors.
	"""
	try:
	speech_tool = SpeechToTextTool()
	transcription = speech_tool.transcribe(audio_bytes)
	return transcription
	except Exception as e:
	raise Exception(f"Failed to transcribe audio: {str(e)}")


	@tool
	def transcribe_audio_file(file_path: str) -> str:
	"""
	Given an audio file path, read the file and return the transcription (text).

	This tool accepts a file path string and handles reading the file internally,
	making it suitable for use when the agent cannot directly call open().

	Args:
	file_path: Path to the audio file to transcribe. Can be MP3, WAV, OGG, or other
	common audio formats. Must be a valid file path accessible on the filesystem.

	Returns:
	str: The transcribed text from the audio file.

	Raises:
	Exception: If the file cannot be read or transcription fails.
	"""
	try:
	path = Path(file_path)
	if not path.exists():
	raise FileNotFoundError(f"Audio file not found: {file_path}")

	# Read the file bytes
	with open(path, "rb") as f:
	audio_bytes = f.read()

	# Use the existing transcribe_audio logic
	speech_tool = SpeechToTextTool()
	transcription = speech_tool.transcribe(audio_bytes)
	return transcription
	except FileNotFoundError as e:
	raise Exception(f"File not found: {str(e)}")
	except Exception as e:
	raise Exception(f"Failed to transcribe audio file '{file_path}': {str(e)}")