feat: add speecht to text functionality

2025-06-30 03:54:39 +02:00 · 2025-06-30 03:54:39 +02:00 · e1e43fcfdb
commit e1e43fcfdb
parent ca503eb156
1 changed files with 70 additions and 0 deletions
--- a/speech_to_text.py
+++ b/speech_to_text.py
@ -11,6 +11,8 @@ import wave
 import pyaudio
 import numpy as np
 from typing import Optional, Dict, Any, Callable
 from gtts import gTTS
 import tempfile
 class SpeechToText:
@ -179,6 +181,10 @@ class SpeechToText:
                print(f"Recognized: {text}")
                # Play back the recognized text via TTS
                if text:
                    threading.Thread(target=self.speak_text, args=(text,)).start()
                # Call callback with result if provided
                if self.callback and text:
                    self.callback(text)
@ -188,6 +194,70 @@ class SpeechToText:
        except Exception as e:
            print(f"Error processing audio: {e}")
    def speak_text(self, text: str) -> None:
        """Convert text to speech and play it back on the audio device."""
        try:
            print("Converting text to speech...")
            # Create a temporary file to store the TTS audio
            with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
                temp_filename = temp_file.name
            # Generate speech using gTTS
            tts = gTTS(text=text, lang=self.language)
            tts.save(temp_filename)
            # Play the audio file
            print("Playing TTS feedback...")
            # Open the audio file and play it
            wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
            # Open a stream for playback
            output_stream = self.p.open(
                format=self.p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True,
                output_device_index=self.device_index
            )
            # Play the audio
            chunk_size = 1024
            data = wf.readframes(chunk_size)
            while len(data) > 0:
                output_stream.write(data)
                data = wf.readframes(chunk_size)
            # Clean up resources
            output_stream.stop_stream()
            output_stream.close()
            wf.close()
            # Remove temporary files
            try:
                os.unlink(temp_filename)
            except Exception:
                pass
        except Exception as e:
            print(f"Error generating or playing speech: {e}")
    def _convert_mp3_to_wav(self, mp3_file: str) -> str:
        """Convert MP3 to WAV format for compatibility with PyAudio."""
        try:
            import subprocess
            wav_file = mp3_file.replace('.mp3', '.wav')
            # Use ffmpeg to convert MP3 to WAV
            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], 
                           stdout=subprocess.DEVNULL, 
                           stderr=subprocess.DEVNULL)
            return wav_file
        except Exception as e:
            print(f"Error converting MP3 to WAV: {e}")
            return ""
    def cleanup(self) -> None:
        """Clean up resources."""
        if self.recording: