From e1e43fcfdbb5e59c8f3640156f988a5ad58e524c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=C3=A4usler?= <jan.haeusler@metropolitan-cities.com>
Date: Mon, 30 Jun 2025 03:54:39 +0200
Subject: [PATCH] feat: add speecht to text functionality

---
 speech_to_text.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/speech_to_text.py b/speech_to_text.py
index 923cfae..f93ec3f 100644
--- a/speech_to_text.py
+++ b/speech_to_text.py
@@ -11,6 +11,8 @@ import wave
 import pyaudio
 import numpy as np
 from typing import Optional, Dict, Any, Callable
+from gtts import gTTS
+import tempfile
 
 
 class SpeechToText:
@@ -179,6 +181,10 @@ class SpeechToText:
                 
                 print(f"Recognized: {text}")
                 
+                # Play back the recognized text via TTS
+                if text:
+                    threading.Thread(target=self.speak_text, args=(text,)).start()
+                
                 # Call callback with result if provided
                 if self.callback and text:
                     self.callback(text)
@@ -188,6 +194,70 @@ class SpeechToText:
         except Exception as e:
             print(f"Error processing audio: {e}")
             
+    def speak_text(self, text: str) -> None:
+        """Convert text to speech and play it back on the audio device."""
+        try:
+            print("Converting text to speech...")
+            # Create a temporary file to store the TTS audio
+            with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
+                temp_filename = temp_file.name
+            
+            # Generate speech using gTTS
+            tts = gTTS(text=text, lang=self.language)
+            tts.save(temp_filename)
+            
+            # Play the audio file
+            print("Playing TTS feedback...")
+            
+            # Open the audio file and play it
+            wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
+            
+            # Open a stream for playback
+            output_stream = self.p.open(
+                format=self.p.get_format_from_width(wf.getsampwidth()),
+                channels=wf.getnchannels(),
+                rate=wf.getframerate(),
+                output=True,
+                output_device_index=self.device_index
+            )
+            
+            # Play the audio
+            chunk_size = 1024
+            data = wf.readframes(chunk_size)
+            
+            while len(data) > 0:
+                output_stream.write(data)
+                data = wf.readframes(chunk_size)
+            
+            # Clean up resources
+            output_stream.stop_stream()
+            output_stream.close()
+            wf.close()
+            
+            # Remove temporary files
+            try:
+                os.unlink(temp_filename)
+            except Exception:
+                pass
+                
+        except Exception as e:
+            print(f"Error generating or playing speech: {e}")
+            
+    def _convert_mp3_to_wav(self, mp3_file: str) -> str:
+        """Convert MP3 to WAV format for compatibility with PyAudio."""
+        try:
+            import subprocess
+            wav_file = mp3_file.replace('.mp3', '.wav')
+            
+            # Use ffmpeg to convert MP3 to WAV
+            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], 
+                           stdout=subprocess.DEVNULL, 
+                           stderr=subprocess.DEVNULL)
+            return wav_file
+        except Exception as e:
+            print(f"Error converting MP3 to WAV: {e}")
+            return ""
+            
     def cleanup(self) -> None:
         """Clean up resources."""
         if self.recording: