fix: resample audio before playing
This commit is contained in:
parent
e1e43fcfdb
commit
d37757d764
1 changed files with 36 additions and 13 deletions
|
|
@ -209,17 +209,36 @@ class SpeechToText:
|
|||
# Play the audio file
|
||||
print("Playing TTS feedback...")
|
||||
|
||||
# Open the audio file and play it
|
||||
wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
|
||||
# Convert MP3 to WAV with matching sample rate
|
||||
wav_file = self._convert_mp3_to_wav(temp_filename)
|
||||
if not wav_file:
|
||||
print("Failed to convert speech audio for playback")
|
||||
return
|
||||
|
||||
# Open a stream for playback
|
||||
output_stream = self.p.open(
|
||||
format=self.p.get_format_from_width(wf.getsampwidth()),
|
||||
channels=wf.getnchannels(),
|
||||
rate=wf.getframerate(),
|
||||
output=True,
|
||||
output_device_index=self.device_index
|
||||
)
|
||||
# Open the audio file
|
||||
wf = wave.open(wav_file, 'rb')
|
||||
|
||||
# Use the same sample rate we know works for the device
|
||||
try:
|
||||
output_stream = self.p.open(
|
||||
format=self.p.get_format_from_width(wf.getsampwidth()),
|
||||
channels=wf.getnchannels(),
|
||||
rate=self.rate, # Use the known working sample rate
|
||||
output=True,
|
||||
output_device_index=self.device_index
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to open audio output stream: {e}")
|
||||
# Try again with device default settings
|
||||
device_info = self.p.get_device_info_by_index(self.device_index)
|
||||
output_stream = self.p.open(
|
||||
format=pyaudio.paInt16,
|
||||
channels=self.channels,
|
||||
rate=int(device_info['defaultSampleRate']),
|
||||
output=True,
|
||||
output_device_index=self.device_index
|
||||
)
|
||||
print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
|
||||
|
||||
# Play the audio
|
||||
chunk_size = 1024
|
||||
|
|
@ -237,6 +256,7 @@ class SpeechToText:
|
|||
# Remove temporary files
|
||||
try:
|
||||
os.unlink(temp_filename)
|
||||
os.unlink(wav_file) # Also remove the WAV file
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -244,15 +264,18 @@ class SpeechToText:
|
|||
print(f"Error generating or playing speech: {e}")
|
||||
|
||||
def _convert_mp3_to_wav(self, mp3_file: str) -> str:
|
||||
"""Convert MP3 to WAV format for compatibility with PyAudio."""
|
||||
"""Convert MP3 to WAV format with correct sample rate for PyAudio."""
|
||||
try:
|
||||
import subprocess
|
||||
wav_file = mp3_file.replace('.mp3', '.wav')
|
||||
|
||||
# Use ffmpeg to convert MP3 to WAV
|
||||
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file],
|
||||
# Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
|
||||
# This ensures the device can play it back properly
|
||||
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate),
|
||||
'-ac', str(self.channels), wav_file],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL)
|
||||
print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
|
||||
return wav_file
|
||||
except Exception as e:
|
||||
print(f"Error converting MP3 to WAV: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue