8000 feat: support language parameter · frostming/tetos@90e2834 · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Commit 90e2834

Browse files
committed
feat: support language parameter
Signed-off-by: Frost Ming <me@frostming.com>
1 parent 6651c57 commit 90e2834

File tree

10 files changed

+624
-55
lines changed

10 files changed

+624
-55
lines changed

.github/workflows/release.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ jobs:
99
release-pypi:
1010
name: release-pypi
1111
runs-on: ubuntu-latest
12+
permissions:
13+
contents: write
1214

1315
steps:
1416
- uses: actions/checkout@v4
@@ -35,6 +37,7 @@ jobs:
3537
tetos azure -o azure.mp3 "Hello world"
3638
tetos edge -o edge.mp3 "Hello world"
3739
tetos volc -o volc.mp3 "Hello world"
40+
tetos google -o google.mp3 "Hello world"
3841
env:
3942
OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
4043
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -43,6 +46,7 @@ jobs:
4346
VOLC_ACCESS_KEY: ${{ secrets.VOLC_ACCESS_KEY }}
4447
VOLC_SECRET_KEY: ${{ secrets.VOLC_SECRET_KEY }}
4548
VOLC_APP_KEY: ${{ secrets.VOLC_APP_KEY }}
49+
GOOGLE_CREDENTIALS_JSON: ${{ secrets.GOOGLE_CREDENTIALS }}
4650

4751
- name: Upload audio files
4852
uses: actions/upload-artifact@v2

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,4 @@ cython_debug/
160160
# and can be added to the global gitignore or merged into this file. For a more nuclear
161161
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162162
#.idea/
163+
*.mp3

src/tetos/azure.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import os
24
from pathlib import Path
35

@@ -15,19 +17,15 @@ class AzureSpeaker(Speaker):
1517
Args:
1618
speech_key (str): The Azure Speech key.
1719
speech_region (str): The Azure Speech region.
18-
voice (str): The voice to use.
20+
voice (str, optional): The voice to use.
1921
"""
2022

2123
def __init__(
22-
self, speech_key: str, speech_region: str, *, voice: str = "en-US-AriaNeural"
24+
self, speech_key: str, speech_region: str, *, voice: str | None = None
2325
) -> None:
24-
self.speech_config = speechsdk.SpeechConfig(
25-
subscription=speech_key, region=speech_region
26-
)
27-
self.speech_config.speech_synthesis_voice_name = voice
28-
self.speech_config.set_speech_synthesis_output_format(
29-
speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
30-
)
26+
self.voice = voice
27+
self.speech_key = speech_key
28+
self.speech_region = speech_region
3129
self._set_proxy()
3230

3331
def _set_proxy(self) -> None:
@@ -47,10 +45,29 @@ def _set_proxy(self) -> None:
4745
)
4846
break
4947

50-
async def synthesize(self, text: str, out_file: Path) -> float:
48+
def get_speech_config(self, lang: str) -> str:
49+
config = speechsdk.SpeechConfig(
50+
subscription=self.speech_key, region=self.speech_region
51+
)
52+
config.set_speech_synthesis_output_format(
53+
speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
54+
)
55+
if self.voice:
56+
voice = self.voice
57+
else:
58+
voice = next(
59+
(v for v in self.list_voices() if v.startswith(lang)),
60+
"en-US-AriaNeural",
61+
)
62+
config.speech_synthesis_voice_name = voice
63+
return config
64+
65+
async def synthesize(
66+
self, text: str, out_file: str | Path, lang: str = "en-US"
67+
) -> float:
5168
audio_config = speechsdk.audio.AudioOutputConfig(filename=str(out_file))
5269
speech_synthesizer = speechsdk.SpeechSynthesizer(
53-
speech_config=self.speech_config, audio_config=audio_config
70+
speech_config=self.get_speech_config(lang), audio_config=audio_config
5471
)
5572
result = await anyio.to_thread.run_sync(speech_synthesizer.speak_text, text)
5673

@@ -84,12 +101,16 @@ def get_command(cls) -> click.Command:
84101
required=True,
85102
help="The Azure Speech region.",
86103
)
87-
@click.option("--voice", default="en-US-AriaNeural", help="The voice to use.")
88104
@common_options(cls)
89105
def azure(
90-
speech_key: str, speech_region: str, voice: str, text: str, output: str
106+
speech_key: str,
107+
speech_region: str,
108+
voice: str | None,
109+
text: str,
110+
lang: str,
111+
output: str,
91112
) -> None:
92113
speaker = cls(speech_key, speech_region, voice=voice)
93-
speaker.say(text, Path(output))
114+
speaker.say(text, output, lang=lang)
94115

95116
return azure

src/tetos/base.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import abc
24
from pathlib import Path
35
from typing import Any, Callable, TypeVar
@@ -13,12 +15,15 @@ class SynthesizeError(RuntimeError):
1315

1416
class Speaker(metaclass=abc.ABCMeta):
1517
@abc.abstractmethod
16-
async def synthesize(self, text: str, out_file: Path) -> float:
18+
async def synthesize(
19+
self, text: str, out_file: str | Path, lang: str = "en-US"
20+
) -> float:
1721
"""Generate speech from text and save it to a file.
1822
1923
Args:
2024
text (str): The text to synthesize.
2125
out_file (Path): The file to save the speech to.
26+
lang (str): The language code of the text. e.g. "en-US", "fr-FR".
2227
2328
Returns:
2429
float: The duration of the speech in seconds.
@@ -45,17 +50,23 @@ def get_command(cls) -> click.Command:
4550
"""
4651
raise NotImplementedError
4752

48-
def say(self, text: str, out_file: Path | None = None) -> float:
49-
"""A synchronous version of synthesize() that takes an optional
50-
playback argument to play the audio.
53+
def say(
54+
self, text: str, out_file: str | Path | None = None, lang: str = "en-US"
55+
) -> float:
56+
"""A synchronous version of synthesize()
57+
58+
Args:
59+
text (str): The text to synthesize.
60+
out_file (Path): The file to save the speech to.
61+
lang (str): The language code of the text. e.g. "en-US", "fr-FR".
5162
"""
5263
import anyio
5364
import click
5465

5566
if out_file is None:
5667
out_file = Path("tts-output.mp3")
5768

58-
result = anyio.run(self.synthesize, text, out_file)
69+
result = anyio.run(self.synthesize, text, out_file, lang)
5970
click.echo(f"Speech is generated successfully at {out_file}")
6071
return result
6172

@@ -76,6 +87,15 @@ def decorator(func: F) -> F:
7687
default="tts-output.mp3",
7788
help="The output file.",
7889
)(func)
90+
func = click.option(
91+
"--voice",
92+
help="The voice to use. See supported voices with `--list-voices`",
93+
)(func)
94+
func = click.option(
95+
"--lang",
96+
default="en-US",
97+
help="The language code of the text. e.g. 'en-US', 'fr-FR'.",
98+
)(func)
7999
func = click.option(
80100
"--list-voices",
81101
"-l",

0 commit comments

Comments
 (0)
0