diff --git a/speech_to_text.py b/speech_to_text.py index f93ec3f..1b08158 100644 --- a/speech_to_text.py +++ b/speech_to_text.py @@ -209,17 +209,36 @@ class SpeechToText: # Play the audio file print("Playing TTS feedback...") - # Open the audio file and play it - wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb') + # Convert MP3 to WAV with matching sample rate + wav_file = self._convert_mp3_to_wav(temp_filename) + if not wav_file: + print("Failed to convert speech audio for playback") + return + + # Open the audio file + wf = wave.open(wav_file, 'rb') - # Open a stream for playback - output_stream = self.p.open( - format=self.p.get_format_from_width(wf.getsampwidth()), - channels=wf.getnchannels(), - rate=wf.getframerate(), - output=True, - output_device_index=self.device_index - ) + # Use the same sample rate we know works for the device + try: + output_stream = self.p.open( + format=self.p.get_format_from_width(wf.getsampwidth()), + channels=wf.getnchannels(), + rate=self.rate, # Use the known working sample rate + output=True, + output_device_index=self.device_index + ) + except Exception as e: + print(f"Failed to open audio output stream: {e}") + # Try again with device default settings + device_info = self.p.get_device_info_by_index(self.device_index) + output_stream = self.p.open( + format=pyaudio.paInt16, + channels=self.channels, + rate=int(device_info['defaultSampleRate']), + output=True, + output_device_index=self.device_index + ) + print(f"Using device default sample rate: {device_info['defaultSampleRate']}") # Play the audio chunk_size = 1024 @@ -237,6 +256,7 @@ class SpeechToText: # Remove temporary files try: os.unlink(temp_filename) + os.unlink(wav_file) # Also remove the WAV file except Exception: pass @@ -244,15 +264,18 @@ class SpeechToText: print(f"Error generating or playing speech: {e}") def _convert_mp3_to_wav(self, mp3_file: str) -> str: - """Convert MP3 to WAV format for compatibility with PyAudio.""" + """Convert MP3 to WAV format with correct sample rate for PyAudio.""" try: import subprocess wav_file = mp3_file.replace('.mp3', '.wav') - # Use ffmpeg to convert MP3 to WAV - subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], + # Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording + # This ensures the device can play it back properly + subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate), + '-ac', str(self.channels), wav_file], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"Converted MP3 to WAV with sample rate {self.rate} Hz") return wav_file except Exception as e: print(f"Error converting MP3 to WAV: {e}")