diff --git a/.gitignore b/.gitignore index f02a29f2..2baf34b7 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ extensions.external.json /data/models/ap_bwe/weights/ /data/models/openvoice/ /data/models/openvoice_v2/ +/data/models/kimi-audio/ # Ignore temporary files temp/ diff --git a/README.md b/README.md index e43c3e1b..be4d2482 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,13 @@ ## Changelog +May 6: +* Add Kimi Audio 7B Instruct extension +* Fix React-Gradio file proxy missing slash +* Add Kokoro TTS API extension + +## April 2025 + Apr 25: * Add OpenVoice V2 extension diff --git a/extensions.json b/extensions.json index bf918e59..02f670c4 100644 --- a/extensions.json +++ b/extensions.json @@ -597,6 +597,36 @@ "website": "https://github.com/myshell-ai/OpenVoice", "extension_website": "https://github.com/rsxdalv/extension_openvoice_v2", "extension_platform_version": "0.0.1" + }, + { + "package_name": "extension_kimi_audio", + "name": "Kimi Audio", + "version": "0.0.1", + "requirements": "git+https://github.com/rsxdalv/extension_kimi_audio@main", + "description": "Kimi Audio is a powerful text-to-speech and speech-to-text model by Moonshot AI", + "extension_type": "interface", + "extension_class": "tools", + "author": "Moonshot AI", + "extension_author": "rsxdalv", + "license": "MIT", + "website": "https://github.com/moonshotai/Kimi-Audio", + "extension_website": "https://github.com/rsxdalv/extension_kimi_audio", + "extension_platform_version": "0.0.1" + }, + { + "package_name": "extension_kokoro_tts_api", + "name": "Kokoro TTS API", + "version": "0.0.1", + "requirements": "git+https://github.com/rsxdalv/extension_kokoro_tts_api@main", + "description": "Kokoro TTS API is a text-to-speech model by hexgrad", + "extension_type": "interface", + "extension_class": "tools", + "author": "hexgrad", + "extension_author": "rsxdalv", + "license": "MIT", + "website": "https://huggingface.co/hexgrad/Kokoro-82M", + "extension_website": "https://github.com/rsxdalv/extension_kokoro_tts_api", + "extension_platform_version": "0.0.1" } ], "decorators": [ diff --git a/requirements.txt b/requirements.txt index ae1ff181..54983da0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,8 +15,13 @@ fairseq @ https://github.com/rsxdalv/fairseq/releases/download/v0.12.3/fairseq-0 fairseq @ https://github.com/rsxdalv/fairseq/releases/download/v0.12.3/fairseq-0.12.13-cp310-cp310-macosx_11_0_universal2.whl ; sys_platform == 'darwin' # MIT License accelerate>=0.33.0 # optimum-quanto==0.2.6 + +# experimental # triton>=3.2.0 ; sys_platform == 'linux', # triton-windows>=3.2.0.post18 ; sys_platform == 'win32', +# flash-attn @ https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp310-cp310-win_amd64.whl ; sys_platform == 'win32' +# flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; sys_platform == 'linux' + extension_bark @ git+https://github.com/rsxdalv/extension_bark@main extension_tortoise @ git+https://github.com/rsxdalv/extension_tortoise@main diff --git a/server.py b/server.py index 11338f08..289338fd 100644 --- a/server.py +++ b/server.py @@ -257,7 +257,7 @@ def signal_handler(signal, frame, postgres_process): "npm start --prefix react-ui", env={ **os.environ, - "GRADIO_BACKEND_AUTOMATIC": f"http://127.0.0.1:{gradio_interface_options['server_port']}", + "GRADIO_BACKEND_AUTOMATIC": f"http://127.0.0.1:{gradio_interface_options['server_port']}/", }, shell=True, )