KevinZhang19870314
diff --git a/‎.gitignore
+3-1 b/‎.gitignore
+3-1
diff --git a/‎Dockerfile
+11-10 b/‎Dockerfile
+11-10
diff --git a/‎README.md
+33-10 b/‎README.md
+33-10
diff --git a/‎app/config/config.py
+32-31 b/‎app/config/config.py
+32-31
diff --git a/‎app/services/llm.py
+60-11 b/‎app/services/llm.py
+60-11
diff --git a/‎app/services/subtitle.py
+8-2 b/‎app/services/subtitle.py
+8-2
@@ -9,4 +9,6 @@
 /app/utils/__pycache__/
 /*/__pycache__/*
 .vscode
-/**/.streamlit
+/**/.streamlit
+__pycache__
+logs/
@@ -4,7 +4,7 @@ FROM python:3.10-slim
 # Set the working directory in the container
 WORKDIR /MoneyPrinterTurbo
 
-ENV PYTHONPATH="/MoneyPrinterTurbo:$PYTHONPATH"
+ENV PYTHONPATH="/MoneyPrinterTurbo"
 
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
@@ -17,11 +17,7 @@ RUN apt-get update && apt-get install -y \
 RUN sed -i '/<policy domain="path" rights="none" pattern="@\*"/d' /etc/ImageMagick-6/policy.xml
 
 # Copy the current directory contents into the container at /MoneyPrinterTurbo
-COPY ./app ./app
-COPY ./webui ./webui
-COPY ./resource ./resource
-COPY ./requirements.txt ./requirements.txt
-COPY ./main.py ./main.py
+COPY . .
 
 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
@@ -30,8 +26,13 @@ RUN pip install --no-cache-dir -r requirements.txt
 EXPOSE 8501
 
 # Command to run the application
-CMD ["streamlit", "run", "./webui/Main.py","--browser.serverAddress=0.0.0.0","--server.enableCORS=True","--browser.gatherUsageStats=False"]
+CMD ["streamlit", "run", "./webui/Main.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False"]
 
-# At runtime, mount the config.toml file from the host into the container
-# using Docker volumes. Example usage:
-# docker run -v ./config.toml:/MoneyPrinterTurbo/config.toml -v ./storage:/MoneyPrinterTurbo/storage -p 8501:8501 moneyprinterturbo
+# 1. Build the Docker image using the following command
+# docker build -t moneyprinterturbo .
+
+# 2. Run the Docker container using the following command
+## For Linux or MacOS:
+# docker run -v $(pwd)/config.toml:/MoneyPrinterTurbo/config.toml -v $(pwd)/storage:/MoneyPrinterTurbo/storage -p 8501:8501 moneyprinterturbo
+## For Windows:
+# docker run -v %cd%/config.toml:/MoneyPrinterTurbo/config.toml -v %cd%/storage:/MoneyPrinterTurbo/storage -p 8501:8501 moneyprinterturbo
@@ -66,6 +66,9 @@
 - [ ] 支持更多的语音合成服务商，比如 OpenAI TTS, Azure TTS
 - [ ] 自动上传到YouTube平台
 
+## 交流讨论 💬
+<img src="docs/wechat-01.jpg" width="300">
+
 ## 视频演示 📺
 
 ### 竖屏 9:16
@@ -102,8 +105,17 @@
 </tbody>
 </table>
 
+## 配置要求 📦
+- 建议最低 CPU 4核或以上，内存 8G 或以上，显卡非必须
+- Windows 10 或 MacOS 11.0 以上系统
+
 ## 安装部署 📥
 
+> 不想部署的可以直接下载安装包，解压直接使用
+- **Windows** 版本下载地址
+  - 百度网盘: https://pan.baidu.com/s/1BB3SGtAFTytzFLS5t2d8Gg?pwd=5bry
+
+### 前提条件
 - 尽量不要使用 **中文路径**，避免出现一些无法预料的问题
 - 请确保你的 **网络** 是正常的，VPN需要打开`全局流量`模式
 
@@ -230,8 +242,8 @@ python main.py
 
 当前支持2种字幕生成方式：
 
-- edge: 生成速度更快，性能更好，对电脑配置没有要求，但是质量可能不稳定
-- whisper: 生成速度较慢，性能较差，对电脑配置有一定要求，但是质量更可靠。
+- **edge**: 生成`速度快`，性能更好，对电脑配置没有要求，但是质量可能不稳定
+- **whisper**: 生成`速度慢`，性能较差，对电脑配置有一定要求，但是`质量更可靠`。
 
 可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换
 
@@ -241,6 +253,25 @@ python main.py
 1. whisper 模式下需要到 HuggingFace 下载一个模型文件，大约 3GB 左右，请确保网络通畅
 2. 如果留空，表示不生成字幕。
 
+> 由于国内无法访问 HuggingFace，可以使用以下方法下载 `whisper-large-v3` 的模型文件
+
+下载地址：
+- 百度网盘: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- 夸克网盘：https://pan.quark.cn/s/3ee3d991d64b
+
+模型下载后解压，整个目录放到 `.\MoneyPrinterTurbo\models` 里面，
+最终的文件路径应该是这样: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+```
+MoneyPrinterTurbo  
+  ├─models
+  │   └─whisper-large-v3
+  │          config.json
+  │          model.bin
+  │          preprocessor_config.json
+  │          tokenizer.json
+  │          vocabulary.json
+```
+
 ## 背景音乐 🎵
 
 用于视频的背景音乐，位于项目的 `resource/songs` 目录下。
@@ -375,14 +406,6 @@ pip install Pillow==8.4.0
 
 - 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues)
   或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。
-- 也可以关注我的 **抖音** 或 **视频号**：`网旭哈瑞.AI`
-    - 我会在上面发布一些 **使用教程** 和 **纯技术** 分享。
-    - 如果有更新和优化，我也会在上面 **及时通知**。
-    - 有问题也可以在上面 **留言**，我会 **尽快回复**。
-
-|                   抖音                    |              |                     视频号                     |
-|:---------------------------------------:|:------------:|:-------------------------------------------:|
-| <img src="docs/douyin.jpg" width="180"> |              | <img src="docs/shipinghao.jpg" width="200"> |
 
 ## 参考项目 📚
 
 
@@ -1,28 +1,45 @@
 import os
 import socket
 import toml
+import shutil
 from loguru import logger
 
 root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 config_file = f"{root_dir}/config.toml"
-if not os.path.isfile(config_file):
-    example_file = f"{root_dir}/config.example.toml"
-    if os.path.isfile(example_file):
-        import shutil
 
-        shutil.copyfile(example_file, config_file)
-        logger.info(f"copy config.example.toml to config.toml")
 
-logger.info(f"load config from file: {config_file}")
+def load_config():
+    # fix: IsADirectoryError: [Errno 21] Is a directory: '/MoneyPrinterTurbo/config.toml'
+    if os.path.isdir(config_file):
+        shutil.rmtree(config_file)
 
-try:
-    _cfg = toml.load(config_file)
-except Exception as e:
-    logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
-    with open(config_file, mode="r", encoding='utf-8-sig') as fp:
-        _cfg_content = fp.read()
-        _cfg = toml.loads(_cfg_content)
+    if not os.path.isfile(config_file):
+        example_file = f"{root_dir}/config.example.toml"
+        if os.path.isfile(example_file):
+            shutil.copyfile(example_file, config_file)
+            logger.info(f"copy config.example.toml to config.toml")
 
+    logger.info(f"load config from file: {config_file}")
+
+    try:
+        _config_ = toml.load(config_file)
+    except Exception as e:
+        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
+        with open(config_file, mode="r", encoding='utf-8-sig') as fp:
+            _cfg_content = fp.read()
+            _config_ = toml.loads(_cfg_content)
+    return _config_
+
+
+def save_config():
+    with open(config_file, "w", encoding="utf-8") as f:
+        _cfg["app"] = app
+        _cfg["whisper"] = whisper
+        _cfg["pexels"] = pexels
+        f.write(toml.dumps(_cfg))
+
+
+_cfg = load_config()
 app = _cfg.get("app", {})
 whisper = _cfg.get("whisper", {})
 pexels = _cfg.get("pexels", {})
@@ -36,7 +53,7 @@
 project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
 project_description = _cfg.get("project_description",
                                "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
-project_version = _cfg.get("project_version", "1.0.1")
+project_version = _cfg.get("project_version", "1.1.0")
 reload_debug = False
 
 imagemagick_path = app.get("imagemagick_path", "")
@@ -46,19 +63,3 @@
 ffmpeg_path = app.get("ffmpeg_path", "")
 if ffmpeg_path and os.path.isfile(ffmpeg_path):
     os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
-
-
-# __cfg = {
-#     "hostname": hostname,
-#     "listen_host": listen_host,
-#     "listen_port": listen_port,
-# }
-# logger.info(__cfg)
-
-
-def save_config():
-    with open(config_file, "w", encoding="utf-8") as f:
-        _cfg["app"] = app
-        _cfg["whisper"] = whisper
-        _cfg["pexels"] = pexels
-        f.write(toml.dumps(_cfg))
 
@@ -5,6 +5,8 @@
 from loguru import logger
 from openai import OpenAI
 from openai import AzureOpenAI
+from openai.types.chat import ChatCompletion
+
 from app.config import config
 
 
@@ -57,6 +59,11 @@ def _generate_response(prompt: str) -> str:
             api_key = config.app.get("qwen_api_key")
             model_name = config.app.get("qwen_model_name")
             base_url = "***"
+        elif llm_provider == "cloudflare":
+            api_key = config.app.get("cloudflare_api_key")
+            model_name = config.app.get("cloudflare_model_name")
+            account_id = config.app.get("cloudflare_account_id")
+            base_url = "***"
         else:
             raise ValueError("llm_provider is not set, please set it in the config.toml file.")
 
@@ -69,17 +76,31 @@ def _generate_response(prompt: str) -> str:
 
         if llm_provider == "qwen":
             import dashscope
+            from dashscope.api_entities.dashscope_response import GenerationResponse
             dashscope.api_key = api_key
             response = dashscope.Generation.call(
                 model=model_name,
                 messages=[{"role": "user", "content": prompt}]
             )
-            content = response["output"]["text"]
-            return content.replace("\n", "")
+            if response:
+                if isinstance(response, GenerationResponse):
+                    status_code = response.status_code
+                    if status_code != 200:
+                        raise Exception(
+                            f"[{llm_provider}] returned an error response: \"{response}\"")
+
+                    content = response["output"]["text"]
+                    return content.replace("\n", "")
+                else:
+                    raise Exception(
+                        f"[{llm_provider}] returned an invalid response: \"{response}\"")
+            else:
+                raise Exception(
+                    f"[{llm_provider}] returned an empty response")
 
         if llm_provider == "gemini":
             import google.generativeai as genai
-            genai.configure(api_key=api_key)
+            genai.configure(api_key=api_key, transport='rest')
 
             generation_config = {
                 "temperature": 0.5,
@@ -111,10 +132,30 @@ def _generate_response(prompt: str) -> str:
                                           generation_config=generation_config,
                                           safety_settings=safety_settings)
 
-            convo = model.start_chat(history=[])
-
-            convo.send_message(prompt)
-            return convo.last.text
+            try:
+                response = model.generate_content(prompt)
+                candidates = response.candidates
+                generated_text = candidates[0].content.parts[0].text
+            except (AttributeError, IndexError) as e:
+                print("Gemini Error:", e)
+
+            return generated_text
+
+        if llm_provider == "cloudflare":
+            import requests
+            response = requests.post(
+                f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={
+                    "messages": [
+                        {"role": "system", "content": "You are a friendly assistant"},
+                        {"role": "user", "content": prompt}
+                    ]
+                }
+            )
+            result = response.json()
+            logger.info(result)
+            return result["result"]["response"]
 
         if llm_provider == "azure":
             client = AzureOpenAI(
@@ -133,7 +174,15 @@ def _generate_response(prompt: str) -> str:
             messages=[{"role": "user", "content": prompt}]
         )
         if response:
-            content = response.choices[0].message.content
+            if isinstance(response, ChatCompletion):
+                content = response.choices[0].message.content
+            else:
+                raise Exception(
+                    f"[{llm_provider}] returned an invalid response: \"{response}\", please check your network "
+                    f"connection and try again.")
+        else:
+            raise Exception(
+                f"[{llm_provider}] returned an empty response, please check your network connection and try again.")
 
     return content.replace("\n", "")
 
@@ -149,9 +198,9 @@ def generate_script(video_subject: str, language: str = "", paragraph_number: in
 1. the script is to be returned as a string with the specified number of paragraphs.
 2. do not under any circumstance reference this prompt in your response.
 3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
-4. you must not include any type of markdown or formatting in the script, never use a title. 
-5. only return the raw content of the script. 
-6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. 
+4. you must not include any type of markdown or formatting in the script, never use a title.
+5. only return the raw content of the script.
+6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
 7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
 8. respond in the same language as the video subject.
 
 
@@ -1,4 +1,5 @@
 import json
+import os.path
 import re
 
 from faster_whisper import WhisperModel
@@ -17,8 +18,13 @@
 def create(audio_file, subtitle_file: str = ""):
     global model
     if not model:
-        logger.info(f"loading model: {model_size}, device: {device}, compute_type: {compute_type}")
-        model = WhisperModel(model_size_or_path=model_size,
+        model_path = f"{utils.root_dir()}/models/whisper-{model_size}"
+        model_bin_file = f"{model_path}/model.bin"
+        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
+            model_path = model_size
+
+        logger.info(f"loading model: {model_path}, device: {device}, compute_type: {compute_type}")
+        model = WhisperModel(model_size_or_path=model_path,
                              device=device,
                              compute_type=compute_type)