fix: resample audio before playing

2025-06-30 03:59:00 +02:00 · 2025-06-30 03:59:00 +02:00 · d37757d764
commit d37757d764
parent e1e43fcfdb
1 changed files with 36 additions and 13 deletions
--- a/speech_to_text.py
+++ b/speech_to_text.py
@ -209,17 +209,36 @@ class SpeechToText:
            # Play the audio file
            print("Playing TTS feedback...")
            
-            # Open the audio file and play it
-            wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
+            # Convert MP3 to WAV with matching sample rate
+            wav_file = self._convert_mp3_to_wav(temp_filename)
+            if not wav_file:
+                print("Failed to convert speech audio for playback")
+                return
                
-            # Open a stream for playback
-            output_stream = self.p.open(
-                format=self.p.get_format_from_width(wf.getsampwidth()),
-                channels=wf.getnchannels(),
-                rate=wf.getframerate(),
-                output=True,
-                output_device_index=self.device_index
-            )
+            # Open the audio file
+            wf = wave.open(wav_file, 'rb')
+            
+            # Use the same sample rate we know works for the device
+            try:
+                output_stream = self.p.open(
+                    format=self.p.get_format_from_width(wf.getsampwidth()),
+                    channels=wf.getnchannels(),
+                    rate=self.rate,  # Use the known working sample rate
+                    output=True,
+                    output_device_index=self.device_index
+                )
+            except Exception as e:
+                print(f"Failed to open audio output stream: {e}")
+                # Try again with device default settings
+                device_info = self.p.get_device_info_by_index(self.device_index)
+                output_stream = self.p.open(
+                    format=pyaudio.paInt16,
+                    channels=self.channels,
+                    rate=int(device_info['defaultSampleRate']),
+                    output=True,
+                    output_device_index=self.device_index
+                )
+                print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
            
            # Play the audio
            chunk_size = 1024
@ -237,6 +256,7 @@ class SpeechToText:
            # Remove temporary files
            try:
                os.unlink(temp_filename)
+                os.unlink(wav_file)  # Also remove the WAV file
            except Exception:
                pass
                
@ -244,15 +264,18 @@ class SpeechToText:
            print(f"Error generating or playing speech: {e}")
            
    def _convert_mp3_to_wav(self, mp3_file: str) -> str:
-        """Convert MP3 to WAV format for compatibility with PyAudio."""
+        """Convert MP3 to WAV format with correct sample rate for PyAudio."""
        try:
            import subprocess
            wav_file = mp3_file.replace('.mp3', '.wav')
            
-            # Use ffmpeg to convert MP3 to WAV
-            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], 
+            # Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
+            # This ensures the device can play it back properly
+            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate), 
+                           '-ac', str(self.channels), wav_file], 
                           stdout=subprocess.DEVNULL, 
                           stderr=subprocess.DEVNULL)
+            print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
            return wav_file
        except Exception as e:
            print(f"Error converting MP3 to WAV: {e}")