fix: try pygame for playback
This commit is contained in:
parent
92ebb47849
commit
d3ed8d1ee0
1 changed files with 41 additions and 77 deletions
|
|
@ -13,6 +13,7 @@ import numpy as np
|
||||||
from typing import Optional, Dict, Any, Callable
|
from typing import Optional, Dict, Any, Callable
|
||||||
from gtts import gTTS
|
from gtts import gTTS
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import pygame # For smoother audio playback
|
||||||
|
|
||||||
|
|
||||||
class SpeechToText:
|
class SpeechToText:
|
||||||
|
|
@ -33,6 +34,13 @@ class SpeechToText:
|
||||||
self.recording_thread = None
|
self.recording_thread = None
|
||||||
self.callback = None
|
self.callback = None
|
||||||
|
|
||||||
|
# Initialize pygame for audio playback
|
||||||
|
if not pygame.get_init():
|
||||||
|
try:
|
||||||
|
pygame.init()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to initialize pygame: {e}")
|
||||||
|
|
||||||
# Audio settings from config or defaults
|
# Audio settings from config or defaults
|
||||||
self.format = pyaudio.paInt16
|
self.format = pyaudio.paInt16
|
||||||
self.channels = 1
|
self.channels = 1
|
||||||
|
|
@ -195,8 +203,12 @@ class SpeechToText:
|
||||||
print(f"Error processing audio: {e}")
|
print(f"Error processing audio: {e}")
|
||||||
|
|
||||||
def speak_text(self, text: str) -> None:
|
def speak_text(self, text: str) -> None:
|
||||||
"""Convert text to speech and play it back on the audio device."""
|
"""Convert text to speech and play it back using pygame mixer (smoother playback)."""
|
||||||
try:
|
try:
|
||||||
|
# Initialize pygame mixer if not already done
|
||||||
|
if not pygame.get_init():
|
||||||
|
pygame.mixer.init(frequency=self.rate, channels=self.channels)
|
||||||
|
|
||||||
print("Converting text to speech...")
|
print("Converting text to speech...")
|
||||||
# Create a temporary file to store the TTS audio
|
# Create a temporary file to store the TTS audio
|
||||||
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
|
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
|
||||||
|
|
@ -206,98 +218,50 @@ class SpeechToText:
|
||||||
tts = gTTS(text=text, lang=self.language)
|
tts = gTTS(text=text, lang=self.language)
|
||||||
tts.save(temp_filename)
|
tts.save(temp_filename)
|
||||||
|
|
||||||
# Play the audio file
|
|
||||||
print("Playing TTS feedback...")
|
print("Playing TTS feedback...")
|
||||||
|
|
||||||
# Convert MP3 to WAV with matching sample rate
|
|
||||||
wav_file = self._convert_mp3_to_wav(temp_filename)
|
|
||||||
if not wav_file:
|
|
||||||
print("Failed to convert speech audio for playback")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Open the audio file
|
|
||||||
wf = wave.open(wav_file, 'rb')
|
|
||||||
|
|
||||||
# Use the same sample rate we know works for the device
|
|
||||||
try:
|
try:
|
||||||
output_stream = self.p.open(
|
# Use pygame mixer for smoother playback
|
||||||
format=self.p.get_format_from_width(wf.getsampwidth()),
|
pygame.mixer.music.set_volume(1.0)
|
||||||
channels=wf.getnchannels(),
|
pygame.mixer.music.load(temp_filename)
|
||||||
rate=self.rate, # Use the known working sample rate
|
pygame.mixer.music.play()
|
||||||
output=True,
|
|
||||||
output_device_index=self.device_index
|
# Wait for playback to finish
|
||||||
)
|
while pygame.mixer.music.get_busy():
|
||||||
|
# Using a short sleep to not consume CPU
|
||||||
|
pygame.time.wait(100) # Wait 100ms between checks
|
||||||
|
|
||||||
|
print("TTS playback completed")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to open audio output stream: {e}")
|
print(f"Error during pygame playback: {e}")
|
||||||
# Try again with device default settings
|
# Fall back to ffplay for playback
|
||||||
device_info = self.p.get_device_info_by_index(self.device_index)
|
self._play_with_ffplay(temp_filename)
|
||||||
output_stream = self.p.open(
|
|
||||||
format=pyaudio.paInt16,
|
|
||||||
channels=self.channels,
|
|
||||||
rate=int(device_info['defaultSampleRate']),
|
|
||||||
output=True,
|
|
||||||
output_device_index=self.device_index
|
|
||||||
)
|
|
||||||
print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
|
|
||||||
|
|
||||||
# Play the audio with larger buffer for smoother playback
|
# Remove temporary file
|
||||||
# Using larger chunk size and adding a small delay to allow the buffer to fill
|
|
||||||
chunk_size = 8192 # Increased from 1024 to reduce stuttering
|
|
||||||
|
|
||||||
# Pre-buffer data for smoother playback
|
|
||||||
audio_data = []
|
|
||||||
while True:
|
|
||||||
data = wf.readframes(chunk_size)
|
|
||||||
if len(data) == 0:
|
|
||||||
break
|
|
||||||
audio_data.append(data)
|
|
||||||
|
|
||||||
# Reset file position for playback
|
|
||||||
wf.rewind()
|
|
||||||
|
|
||||||
# Set a larger buffer size for output
|
|
||||||
buffer_size = chunk_size * 4
|
|
||||||
|
|
||||||
print(f"Playing audio with buffer size {buffer_size} bytes")
|
|
||||||
|
|
||||||
# Play the buffered data with lower CPU load
|
|
||||||
for data in audio_data:
|
|
||||||
output_stream.write(data)
|
|
||||||
# Small sleep to reduce CPU load and allow buffer to process
|
|
||||||
time.sleep(0.005)
|
|
||||||
|
|
||||||
# Clean up resources
|
|
||||||
output_stream.stop_stream()
|
|
||||||
output_stream.close()
|
|
||||||
wf.close()
|
|
||||||
|
|
||||||
# Remove temporary files
|
|
||||||
try:
|
try:
|
||||||
os.unlink(temp_filename)
|
os.unlink(temp_filename)
|
||||||
os.unlink(wav_file) # Also remove the WAV file
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error generating or playing speech: {e}")
|
print(f"Error generating or playing speech: {e}")
|
||||||
|
|
||||||
def _convert_mp3_to_wav(self, mp3_file: str) -> str:
|
|
||||||
"""Convert MP3 to WAV format with correct sample rate for PyAudio."""
|
|
||||||
try:
|
|
||||||
import subprocess
|
|
||||||
wav_file = mp3_file.replace('.mp3', '.wav')
|
|
||||||
|
|
||||||
# Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
|
|
||||||
# This ensures the device can play it back properly
|
def _play_with_ffplay(self, audio_file: str) -> None:
|
||||||
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate),
|
"""Play audio file using ffplay as a fallback method."""
|
||||||
'-ac', str(self.channels), wav_file],
|
try:
|
||||||
|
print("Trying ffplay fallback playback...")
|
||||||
|
import subprocess
|
||||||
|
# The -nodisp flag disables the graphical window
|
||||||
|
# -autoexit will close ffplay when playback finishes
|
||||||
|
subprocess.call(['ffplay', '-nodisp', '-autoexit', audio_file],
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL)
|
stderr=subprocess.DEVNULL)
|
||||||
print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
|
print("ffplay playback completed")
|
||||||
return wav_file
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error converting MP3 to WAV: {e}")
|
print(f"Error using ffplay for playback: {e}")
|
||||||
return ""
|
|
||||||
|
|
||||||
def cleanup(self) -> None:
|
def cleanup(self) -> None:
|
||||||
"""Clean up resources."""
|
"""Clean up resources."""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue