fix: resample audio before playing

2025-06-30 03:59:00 +02:00 · 2025-06-30 03:59:00 +02:00 · d37757d764
commit d37757d764
parent e1e43fcfdb
1 changed files with 36 additions and 13 deletions
--- a/speech_to_text.py
+++ b/speech_to_text.py
@ -209,17 +209,36 @@ class SpeechToText:
            # Play the audio file
            print("Playing TTS feedback...")
-            # Open the audio file and play it
+            # Convert MP3 to WAV with matching sample rate
-            wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
+            wav_file = self._convert_mp3_to_wav(temp_filename)
            if not wav_file:
                print("Failed to convert speech audio for playback")
                return
-            # Open a stream for playback
+            # Open the audio file
            wf = wave.open(wav_file, 'rb')
            # Use the same sample rate we know works for the device
            try:
                output_stream = self.p.open(
                    format=self.p.get_format_from_width(wf.getsampwidth()),
                    channels=wf.getnchannels(),
-                rate=wf.getframerate(),
+                    rate=self.rate,  # Use the known working sample rate
                    output=True,
                    output_device_index=self.device_index
                )
            except Exception as e:
                print(f"Failed to open audio output stream: {e}")
                # Try again with device default settings
                device_info = self.p.get_device_info_by_index(self.device_index)
                output_stream = self.p.open(
                    format=pyaudio.paInt16,
                    channels=self.channels,
                    rate=int(device_info['defaultSampleRate']),
                    output=True,
                    output_device_index=self.device_index
                )
                print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
            # Play the audio
            chunk_size = 1024
@ -237,6 +256,7 @@ class SpeechToText:
            # Remove temporary files
            try:
                os.unlink(temp_filename)
                os.unlink(wav_file)  # Also remove the WAV file
            except Exception:
                pass
@ -244,15 +264,18 @@ class SpeechToText:
            print(f"Error generating or playing speech: {e}")
    def _convert_mp3_to_wav(self, mp3_file: str) -> str:
-        """Convert MP3 to WAV format for compatibility with PyAudio."""
+        """Convert MP3 to WAV format with correct sample rate for PyAudio."""
        try:
            import subprocess
            wav_file = mp3_file.replace('.mp3', '.wav')
-            # Use ffmpeg to convert MP3 to WAV
+            # Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
-            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], 
+            # This ensures the device can play it back properly
            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate), 
                           '-ac', str(self.channels), wav_file], 
                           stdout=subprocess.DEVNULL, 
                           stderr=subprocess.DEVNULL)
            print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
            return wav_file
        except Exception as e:
            print(f"Error converting MP3 to WAV: {e}")