fix: try pygame for playback

This commit is contained in:
Jan Häusler 2025-06-30 04:05:29 +02:00
parent 92ebb47849
commit d3ed8d1ee0

View file

@ -13,6 +13,7 @@ import numpy as np
from typing import Optional, Dict, Any, Callable from typing import Optional, Dict, Any, Callable
from gtts import gTTS from gtts import gTTS
import tempfile import tempfile
import pygame # For smoother audio playback
class SpeechToText: class SpeechToText:
@ -33,6 +34,13 @@ class SpeechToText:
self.recording_thread = None self.recording_thread = None
self.callback = None self.callback = None
# Initialize pygame for audio playback
if not pygame.get_init():
try:
pygame.init()
except Exception as e:
print(f"Warning: Failed to initialize pygame: {e}")
# Audio settings from config or defaults # Audio settings from config or defaults
self.format = pyaudio.paInt16 self.format = pyaudio.paInt16
self.channels = 1 self.channels = 1
@ -195,8 +203,12 @@ class SpeechToText:
print(f"Error processing audio: {e}") print(f"Error processing audio: {e}")
def speak_text(self, text: str) -> None: def speak_text(self, text: str) -> None:
"""Convert text to speech and play it back on the audio device.""" """Convert text to speech and play it back using pygame mixer (smoother playback)."""
try: try:
# Initialize pygame mixer if not already done
if not pygame.get_init():
pygame.mixer.init(frequency=self.rate, channels=self.channels)
print("Converting text to speech...") print("Converting text to speech...")
# Create a temporary file to store the TTS audio # Create a temporary file to store the TTS audio
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file: with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
@ -206,98 +218,50 @@ class SpeechToText:
tts = gTTS(text=text, lang=self.language) tts = gTTS(text=text, lang=self.language)
tts.save(temp_filename) tts.save(temp_filename)
# Play the audio file
print("Playing TTS feedback...") print("Playing TTS feedback...")
# Convert MP3 to WAV with matching sample rate
wav_file = self._convert_mp3_to_wav(temp_filename)
if not wav_file:
print("Failed to convert speech audio for playback")
return
# Open the audio file
wf = wave.open(wav_file, 'rb')
# Use the same sample rate we know works for the device
try: try:
output_stream = self.p.open( # Use pygame mixer for smoother playback
format=self.p.get_format_from_width(wf.getsampwidth()), pygame.mixer.music.set_volume(1.0)
channels=wf.getnchannels(), pygame.mixer.music.load(temp_filename)
rate=self.rate, # Use the known working sample rate pygame.mixer.music.play()
output=True,
output_device_index=self.device_index # Wait for playback to finish
) while pygame.mixer.music.get_busy():
# Using a short sleep to not consume CPU
pygame.time.wait(100) # Wait 100ms between checks
print("TTS playback completed")
except Exception as e: except Exception as e:
print(f"Failed to open audio output stream: {e}") print(f"Error during pygame playback: {e}")
# Try again with device default settings # Fall back to ffplay for playback
device_info = self.p.get_device_info_by_index(self.device_index) self._play_with_ffplay(temp_filename)
output_stream = self.p.open(
format=pyaudio.paInt16,
channels=self.channels,
rate=int(device_info['defaultSampleRate']),
output=True,
output_device_index=self.device_index
)
print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
# Play the audio with larger buffer for smoother playback # Remove temporary file
# Using larger chunk size and adding a small delay to allow the buffer to fill
chunk_size = 8192 # Increased from 1024 to reduce stuttering
# Pre-buffer data for smoother playback
audio_data = []
while True:
data = wf.readframes(chunk_size)
if len(data) == 0:
break
audio_data.append(data)
# Reset file position for playback
wf.rewind()
# Set a larger buffer size for output
buffer_size = chunk_size * 4
print(f"Playing audio with buffer size {buffer_size} bytes")
# Play the buffered data with lower CPU load
for data in audio_data:
output_stream.write(data)
# Small sleep to reduce CPU load and allow buffer to process
time.sleep(0.005)
# Clean up resources
output_stream.stop_stream()
output_stream.close()
wf.close()
# Remove temporary files
try: try:
os.unlink(temp_filename) os.unlink(temp_filename)
os.unlink(wav_file) # Also remove the WAV file
except Exception: except Exception:
pass pass
except Exception as e: except Exception as e:
print(f"Error generating or playing speech: {e}") print(f"Error generating or playing speech: {e}")
def _convert_mp3_to_wav(self, mp3_file: str) -> str:
"""Convert MP3 to WAV format with correct sample rate for PyAudio."""
try:
import subprocess
wav_file = mp3_file.replace('.mp3', '.wav')
# Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
# This ensures the device can play it back properly def _play_with_ffplay(self, audio_file: str) -> None:
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate), """Play audio file using ffplay as a fallback method."""
'-ac', str(self.channels), wav_file], try:
print("Trying ffplay fallback playback...")
import subprocess
# The -nodisp flag disables the graphical window
# -autoexit will close ffplay when playback finishes
subprocess.call(['ffplay', '-nodisp', '-autoexit', audio_file],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL) stderr=subprocess.DEVNULL)
print(f"Converted MP3 to WAV with sample rate {self.rate} Hz") print("ffplay playback completed")
return wav_file
except Exception as e: except Exception as e:
print(f"Error converting MP3 to WAV: {e}") print(f"Error using ffplay for playback: {e}")
return ""
def cleanup(self) -> None: def cleanup(self) -> None:
"""Clean up resources.""" """Clean up resources."""