fix: try pygame for playback

2025-06-30 04:05:29 +02:00 · 2025-06-30 04:05:29 +02:00 · d3ed8d1ee0
commit d3ed8d1ee0
parent 92ebb47849
1 changed files with 41 additions and 77 deletions
--- a/speech_to_text.py
+++ b/speech_to_text.py
@ -13,6 +13,7 @@ import numpy as np
 from typing import Optional, Dict, Any, Callable
 from gtts import gTTS
 import tempfile
 import pygame  # For smoother audio playback
 class SpeechToText:
@ -33,6 +34,13 @@ class SpeechToText:
        self.recording_thread = None
        self.callback = None
        # Initialize pygame for audio playback
        if not pygame.get_init():
            try:
                pygame.init()
            except Exception as e:
                print(f"Warning: Failed to initialize pygame: {e}")
        # Audio settings from config or defaults
        self.format = pyaudio.paInt16
        self.channels = 1
@ -195,8 +203,12 @@ class SpeechToText:
            print(f"Error processing audio: {e}")
    def speak_text(self, text: str) -> None:
-        """Convert text to speech and play it back on the audio device."""
+        """Convert text to speech and play it back using pygame mixer (smoother playback)."""
        try:
            # Initialize pygame mixer if not already done
            if not pygame.get_init():
                pygame.mixer.init(frequency=self.rate, channels=self.channels)
            print("Converting text to speech...")
            # Create a temporary file to store the TTS audio
            with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
@ -206,98 +218,50 @@ class SpeechToText:
            tts = gTTS(text=text, lang=self.language)
            tts.save(temp_filename)
            # Play the audio file
            print("Playing TTS feedback...")
            # Convert MP3 to WAV with matching sample rate
            wav_file = self._convert_mp3_to_wav(temp_filename)
            if not wav_file:
                print("Failed to convert speech audio for playback")
                return
            # Open the audio file
            wf = wave.open(wav_file, 'rb')
            # Use the same sample rate we know works for the device
            try:
-                output_stream = self.p.open(
+                # Use pygame mixer for smoother playback
-                    format=self.p.get_format_from_width(wf.getsampwidth()),
+                pygame.mixer.music.set_volume(1.0)
-                    channels=wf.getnchannels(),
+                pygame.mixer.music.load(temp_filename)
-                    rate=self.rate,  # Use the known working sample rate
+                pygame.mixer.music.play()
-                    output=True,
+                
-                    output_device_index=self.device_index
+                # Wait for playback to finish
-                )
+                while pygame.mixer.music.get_busy():
                    # Using a short sleep to not consume CPU
                    pygame.time.wait(100)  # Wait 100ms between checks
                print("TTS playback completed")
            except Exception as e:
-                print(f"Failed to open audio output stream: {e}")
+                print(f"Error during pygame playback: {e}")
-                # Try again with device default settings
+                # Fall back to ffplay for playback
-                device_info = self.p.get_device_info_by_index(self.device_index)
+                self._play_with_ffplay(temp_filename)
                output_stream = self.p.open(
                    format=pyaudio.paInt16,
                    channels=self.channels,
                    rate=int(device_info['defaultSampleRate']),
                    output=True,
                    output_device_index=self.device_index
                )
                print(f"Using device default sample rate: {device_info['defaultSampleRate']}")
-            # Play the audio with larger buffer for smoother playback
+            # Remove temporary file
            # Using larger chunk size and adding a small delay to allow the buffer to fill
            chunk_size = 8192  # Increased from 1024 to reduce stuttering
            # Pre-buffer data for smoother playback
            audio_data = []
            while True:
                data = wf.readframes(chunk_size)
                if len(data) == 0:
                    break
                audio_data.append(data)
            # Reset file position for playback
            wf.rewind()
            # Set a larger buffer size for output
            buffer_size = chunk_size * 4
            print(f"Playing audio with buffer size {buffer_size} bytes")
            # Play the buffered data with lower CPU load
            for data in audio_data:
                output_stream.write(data)
                # Small sleep to reduce CPU load and allow buffer to process
                time.sleep(0.005)
            # Clean up resources
            output_stream.stop_stream()
            output_stream.close()
            wf.close()
            # Remove temporary files
            try:
                os.unlink(temp_filename)
                os.unlink(wav_file)  # Also remove the WAV file
            except Exception:
                pass
        except Exception as e:
            print(f"Error generating or playing speech: {e}")
    def _convert_mp3_to_wav(self, mp3_file: str) -> str:
        """Convert MP3 to WAV format with correct sample rate for PyAudio."""
        try:
            import subprocess
            wav_file = mp3_file.replace('.mp3', '.wav')
-            # Use ffmpeg to convert MP3 to WAV with the same sample rate as the recording
+            
-            # This ensures the device can play it back properly
+    def _play_with_ffplay(self, audio_file: str) -> None:
-            subprocess.call(['ffmpeg', '-y', '-i', mp3_file, '-ar', str(self.rate), 
+        """Play audio file using ffplay as a fallback method."""
-                           '-ac', str(self.channels), wav_file], 
+        try:
            print("Trying ffplay fallback playback...")
            import subprocess
            # The -nodisp flag disables the graphical window
            # -autoexit will close ffplay when playback finishes
            subprocess.call(['ffplay', '-nodisp', '-autoexit', audio_file], 
                           stdout=subprocess.DEVNULL, 
                           stderr=subprocess.DEVNULL)
-            print(f"Converted MP3 to WAV with sample rate {self.rate} Hz")
+            print("ffplay playback completed")
            return wav_file
        except Exception as e:
-            print(f"Error converting MP3 to WAV: {e}")
+            print(f"Error using ffplay for playback: {e}")
            return ""
    def cleanup(self) -> None:
        """Clean up resources."""