fix: resample audio before playing

This commit is contained in:
Jan Häusler 2025-06-30 03:59:00 +02:00
parent e1e43fcfdb
commit d37757d764

View file

@ -209,17 +209,36 @@ class SpeechToText:
# Play the audio file # Play the audio file
print("Playing TTS feedback...") print("Playing TTS feedback...")
# Open the audio file and play it # Convert MP3 to WAV with matching sample rate
wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb') wav_file = self._convert_mp3_to_wav(temp_filename)
if not wav_file:
print("Failed to convert speech audio for playback")
return
# Open a stream for playback # Open the audio file
wf = wave.open(wav_file, 'rb')
# Use the same sample rate we know works for the device
try:
output_stream = self.p.open( output_stream = self.p.open(
format=self.p.get_format_from_width(wf.getsampwidth()), format=self.p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(), channels=wf.getnchannels(),
rate=wf.getframerate(), rate=self.rate, # Use the known working sample rate
output=True, output=True,
output_device_index=self.device_index output_device_index=self.device_index
) )
except Exception as e:
print(f"Failed to open audio output stream: {e}")
# Try again with device default settings
device_info = self.p.get_device_info_by_index(self.device_index)
output_stream = self.p.open(
format=pyaudio.paInt16,
channels=self.channels,
rate=int(device_info['defaultSampleRate']),
output=True,
output_device_index=self.device_index
)
print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
# Play the audio # Play the audio
chunk_size = 1024 chunk_size = 1024
@ -237,6 +256,7 @@ class SpeechToText:
# Remove temporary files # Remove temporary files
try: try:
os.unlink(temp_filename) os.unlink(temp_filename)
os.unlink(wav_file) # Also remove the WAV file
except Exception: except Exception:
pass pass
@ -244,15 +264,18 @@ class SpeechToText:
print(f"Error generating or playing speech: {e}") print(f"Error generating or playing speech: {e}")
def _convert_mp3_to_wav(self, mp3_file: str) -> str: def _convert_mp3_to_wav(self, mp3_file: str) -> str:
"""Convert MP3 to WAV format for compatibility with PyAudio.""" """Convert MP3 to WAV format with correct sample rate for PyAudio."""
try: try:
import subprocess import subprocess
wav_file = mp3_file.replace('.mp3', '.wav') wav_file = mp3_file.replace('.mp3', '.wav')
# Use ffmpeg to convert MP3 to WAV # Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file], # This ensures the device can play it back properly
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate),
'-ac', str(self.channels), wav_file],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL) stderr=subprocess.DEVNULL)
print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
return wav_file return wav_file
except Exception as e: except Exception as e:
print(f"Error converting MP3 to WAV: {e}") print(f"Error converting MP3 to WAV: {e}")