From e1e43fcfdbb5e59c8f3640156f988a5ad58e524c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=A4usler?= Date: Mon, 30 Jun 2025 03:54:39 +0200 Subject: [PATCH] feat: add speecht to text functionality --- speech_to_text.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/speech_to_text.py b/speech_to_text.py index 923cfae..f93ec3f 100644 --- a/speech_to_text.py +++ b/speech_to_text.py @@ -11,6 +11,8 @@ import wave import pyaudio import numpy as np from typing import Optional, Dict, Any, Callable +from gtts import gTTS +import tempfile class SpeechToText: @@ -179,6 +181,10 @@ class SpeechToText: print(f"Recognized: {text}") + # Play back the recognized text via TTS + if text: + threading.Thread(target=self.speak_text, args=(text,)).start() + # Call callback with result if provided if self.callback and text: self.callback(text) @@ -188,6 +194,70 @@ class SpeechToText: except Exception as e: print(f"Error processing audio: {e}") + def speak_text(self, text: str) -> None: + """Convert text to speech and play it back on the audio device.""" + try: + print("Converting text to speech...") + # Create a temporary file to store the TTS audio + with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file: + temp_filename = temp_file.name + + # Generate speech using gTTS + tts = gTTS(text=text, lang=self.language) + tts.save(temp_filename) + + # Play the audio file + print("Playing TTS feedback...") + + # Open the audio file and play it + wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb') + + # Open a stream for playback + output_stream = self.p.open( + format=self.p.get_format_from_width(wf.getsampwidth()), + channels=wf.getnchannels(), + rate=wf.getframerate(), + output=True, + output_device_index=self.device_index + ) + + # Play the audio + chunk_size = 1024 + data = wf.readframes(chunk_size) + + while len(data) > 0: + output_stream.write(data) + data = wf.readframes(chunk_size) + + # Clean up resources + output_stream.stop_stream() + output_stream.close() + wf.close() + + # Remove temporary files + try: + os.unlink(temp_filename) + except Exception: + pass + + except Exception as e: + print(f"Error generating or playing speech: {e}") + + def _convert_mp3_to_wav(self, mp3_file: str) -> str: + """Convert MP3 to WAV format for compatibility with PyAudio.""" + try: + import subprocess + wav_file = mp3_file.replace('.mp3', '.wav') + + # Use ffmpeg to convert MP3 to WAV + subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + return wav_file + except Exception as e: + print(f"Error converting MP3 to WAV: {e}") + return "" + def cleanup(self) -> None: """Clean up resources.""" if self.recording: