feat: add speecht to text functionality

This commit is contained in:
Jan Häusler 2025-06-30 03:54:39 +02:00
parent ca503eb156
commit e1e43fcfdb

View file

@ -11,6 +11,8 @@ import wave
import pyaudio import pyaudio
import numpy as np import numpy as np
from typing import Optional, Dict, Any, Callable from typing import Optional, Dict, Any, Callable
from gtts import gTTS
import tempfile
class SpeechToText: class SpeechToText:
@ -179,6 +181,10 @@ class SpeechToText:
print(f"Recognized: {text}") print(f"Recognized: {text}")
# Play back the recognized text via TTS
if text:
threading.Thread(target=self.speak_text, args=(text,)).start()
# Call callback with result if provided # Call callback with result if provided
if self.callback and text: if self.callback and text:
self.callback(text) self.callback(text)
@ -188,6 +194,70 @@ class SpeechToText:
except Exception as e: except Exception as e:
print(f"Error processing audio: {e}") print(f"Error processing audio: {e}")
def speak_text(self, text: str) -> None:
"""Convert text to speech and play it back on the audio device."""
try:
print("Converting text to speech...")
# Create a temporary file to store the TTS audio
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
temp_filename = temp_file.name
# Generate speech using gTTS
tts = gTTS(text=text, lang=self.language)
tts.save(temp_filename)
# Play the audio file
print("Playing TTS feedback...")
# Open the audio file and play it
wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
# Open a stream for playback
output_stream = self.p.open(
format=self.p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=self.device_index
)
# Play the audio
chunk_size = 1024
data = wf.readframes(chunk_size)
while len(data) > 0:
output_stream.write(data)
data = wf.readframes(chunk_size)
# Clean up resources
output_stream.stop_stream()
output_stream.close()
wf.close()
# Remove temporary files
try:
os.unlink(temp_filename)
except Exception:
pass
except Exception as e:
print(f"Error generating or playing speech: {e}")
def _convert_mp3_to_wav(self, mp3_file: str) -> str:
"""Convert MP3 to WAV format for compatibility with PyAudio."""
try:
import subprocess
wav_file = mp3_file.replace('.mp3', '.wav')
# Use ffmpeg to convert MP3 to WAV
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
return wav_file
except Exception as e:
print(f"Error converting MP3 to WAV: {e}")
return ""
def cleanup(self) -> None: def cleanup(self) -> None:
"""Clean up resources.""" """Clean up resources."""
if self.recording: if self.recording: