feat: add speecht to text functionality
This commit is contained in:
parent
ca503eb156
commit
e1e43fcfdb
1 changed files with 70 additions and 0 deletions
|
|
@ -11,6 +11,8 @@ import wave
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Optional, Dict, Any, Callable
|
from typing import Optional, Dict, Any, Callable
|
||||||
|
from gtts import gTTS
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
class SpeechToText:
|
class SpeechToText:
|
||||||
|
|
@ -179,6 +181,10 @@ class SpeechToText:
|
||||||
|
|
||||||
print(f"Recognized: {text}")
|
print(f"Recognized: {text}")
|
||||||
|
|
||||||
|
# Play back the recognized text via TTS
|
||||||
|
if text:
|
||||||
|
threading.Thread(target=self.speak_text, args=(text,)).start()
|
||||||
|
|
||||||
# Call callback with result if provided
|
# Call callback with result if provided
|
||||||
if self.callback and text:
|
if self.callback and text:
|
||||||
self.callback(text)
|
self.callback(text)
|
||||||
|
|
@ -188,6 +194,70 @@ class SpeechToText:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing audio: {e}")
|
print(f"Error processing audio: {e}")
|
||||||
|
|
||||||
|
def speak_text(self, text: str) -> None:
|
||||||
|
"""Convert text to speech and play it back on the audio device."""
|
||||||
|
try:
|
||||||
|
print("Converting text to speech...")
|
||||||
|
# Create a temporary file to store the TTS audio
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file:
|
||||||
|
temp_filename = temp_file.name
|
||||||
|
|
||||||
|
# Generate speech using gTTS
|
||||||
|
tts = gTTS(text=text, lang=self.language)
|
||||||
|
tts.save(temp_filename)
|
||||||
|
|
||||||
|
# Play the audio file
|
||||||
|
print("Playing TTS feedback...")
|
||||||
|
|
||||||
|
# Open the audio file and play it
|
||||||
|
wf = wave.open(self._convert_mp3_to_wav(temp_filename), 'rb')
|
||||||
|
|
||||||
|
# Open a stream for playback
|
||||||
|
output_stream = self.p.open(
|
||||||
|
format=self.p.get_format_from_width(wf.getsampwidth()),
|
||||||
|
channels=wf.getnchannels(),
|
||||||
|
rate=wf.getframerate(),
|
||||||
|
output=True,
|
||||||
|
output_device_index=self.device_index
|
||||||
|
)
|
||||||
|
|
||||||
|
# Play the audio
|
||||||
|
chunk_size = 1024
|
||||||
|
data = wf.readframes(chunk_size)
|
||||||
|
|
||||||
|
while len(data) > 0:
|
||||||
|
output_stream.write(data)
|
||||||
|
data = wf.readframes(chunk_size)
|
||||||
|
|
||||||
|
# Clean up resources
|
||||||
|
output_stream.stop_stream()
|
||||||
|
output_stream.close()
|
||||||
|
wf.close()
|
||||||
|
|
||||||
|
# Remove temporary files
|
||||||
|
try:
|
||||||
|
os.unlink(temp_filename)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error generating or playing speech: {e}")
|
||||||
|
|
||||||
|
def _convert_mp3_to_wav(self, mp3_file: str) -> str:
|
||||||
|
"""Convert MP3 to WAV format for compatibility with PyAudio."""
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
wav_file = mp3_file.replace('.mp3', '.wav')
|
||||||
|
|
||||||
|
# Use ffmpeg to convert MP3 to WAV
|
||||||
|
subprocess.call(['ffmpeg', '-y', '-i', mp3_file, wav_file],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL)
|
||||||
|
return wav_file
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error converting MP3 to WAV: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
def cleanup(self) -> None:
|
def cleanup(self) -> None:
|
||||||
"""Clean up resources."""
|
"""Clean up resources."""
|
||||||
if self.recording:
|
if self.recording:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue