Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,7 @@ from llama_cpp import Llama
|
|
| 6 |
import configparser
|
| 7 |
from functools import partial
|
| 8 |
from utils.dl_utils import dl_guff_model
|
| 9 |
-
import
|
| 10 |
-
import tempfile
|
| 11 |
-
import csv
|
| 12 |
|
| 13 |
# 定数
|
| 14 |
DEFAULT_INI_FILE = 'settings.ini'
|
|
@@ -34,7 +32,6 @@ model_path = os.path.join("models", model_filename)
|
|
| 34 |
if not os.path.exists(model_path):
|
| 35 |
dl_guff_model("models", f"https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF/resolve/main/{model_filename}")
|
| 36 |
|
| 37 |
-
|
| 38 |
class ConfigManager:
|
| 39 |
@staticmethod
|
| 40 |
def load_settings(filename):
|
|
@@ -106,6 +103,9 @@ class TextGenerator:
|
|
| 106 |
self.llm = None
|
| 107 |
self.settings = None
|
| 108 |
self.current_model = None
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
@spaces.GPU(duration=120)
|
| 111 |
def load_model(self):
|
|
@@ -113,50 +113,79 @@ class TextGenerator:
|
|
| 113 |
del self.llm
|
| 114 |
self.llm = None
|
| 115 |
|
|
|
|
| 116 |
try:
|
| 117 |
model_path = os.path.join(MODEL_DIR, self.settings['DEFAULT_GEN_MODEL'])
|
| 118 |
n_gpu_layers = self.settings['gen_n_gpu_layers']
|
| 119 |
self.llm = Llama(model_path=model_path, n_ctx=self.settings['gen_n_ctx'], n_gpu_layers=n_gpu_layers)
|
| 120 |
self.current_model = 'GEN'
|
| 121 |
print(f"GEN モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
|
|
|
| 122 |
except Exception as e:
|
| 123 |
print(f"GEN モデルのロード中にエラーが発生しました: {str(e)}")
|
|
|
|
| 124 |
|
| 125 |
def generate_text(self, text, gen_characters, gen_token_multiplier, instruction):
|
| 126 |
if not self.llm:
|
| 127 |
self.load_model()
|
| 128 |
|
| 129 |
-
if not self.llm:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
return "モデルのロードに失敗しました。設定を確認してください。"
|
| 131 |
|
| 132 |
-
author_description = self.settings.get('gen_author_description', '')
|
| 133 |
max_tokens = int(gen_characters * gen_token_multiplier)
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
top_p=self.settings['gen_top_p'],
|
| 147 |
-
top_k=self.settings['gen_top_k'],
|
| 148 |
-
repeat_penalty=self.settings['gen_rep_pen'],
|
| 149 |
-
)
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
def load_settings(self, filename):
|
| 158 |
self.settings = Settings.load_from_ini(filename)
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
# グローバル変数
|
| 161 |
text_generator = TextGenerator()
|
| 162 |
model_files = ModelManager.get_model_files()
|
|
@@ -273,6 +302,8 @@ def build_gradio_interface():
|
|
| 273 |
input_component = gr.Textbox(label=key, value=value)
|
| 274 |
|
| 275 |
input_component.change(
|
|
|
|
|
|
|
| 276 |
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
| 277 |
inputs=[input_component],
|
| 278 |
outputs=[output]
|
|
@@ -294,4 +325,4 @@ if __name__ == "__main__":
|
|
| 294 |
text_generator.load_settings(DEFAULT_INI_FILE)
|
| 295 |
|
| 296 |
demo = build_gradio_interface()
|
| 297 |
-
demo.launch(share=True)
|
|
|
|
| 6 |
import configparser
|
| 7 |
from functools import partial
|
| 8 |
from utils.dl_utils import dl_guff_model
|
| 9 |
+
import threading
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# 定数
|
| 12 |
DEFAULT_INI_FILE = 'settings.ini'
|
|
|
|
| 32 |
if not os.path.exists(model_path):
|
| 33 |
dl_guff_model("models", f"https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF/resolve/main/{model_filename}")
|
| 34 |
|
|
|
|
| 35 |
class ConfigManager:
|
| 36 |
@staticmethod
|
| 37 |
def load_settings(filename):
|
|
|
|
| 103 |
self.llm = None
|
| 104 |
self.settings = None
|
| 105 |
self.current_model = None
|
| 106 |
+
self.history = []
|
| 107 |
+
self.use_chat_format = False
|
| 108 |
+
self.model_loaded = threading.Event()
|
| 109 |
|
| 110 |
@spaces.GPU(duration=120)
|
| 111 |
def load_model(self):
|
|
|
|
| 113 |
del self.llm
|
| 114 |
self.llm = None
|
| 115 |
|
| 116 |
+
self.model_loaded.clear()
|
| 117 |
try:
|
| 118 |
model_path = os.path.join(MODEL_DIR, self.settings['DEFAULT_GEN_MODEL'])
|
| 119 |
n_gpu_layers = self.settings['gen_n_gpu_layers']
|
| 120 |
self.llm = Llama(model_path=model_path, n_ctx=self.settings['gen_n_ctx'], n_gpu_layers=n_gpu_layers)
|
| 121 |
self.current_model = 'GEN'
|
| 122 |
print(f"GEN モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
| 123 |
+
self.model_loaded.set()
|
| 124 |
except Exception as e:
|
| 125 |
print(f"GEN モデルのロード中にエラーが発生しました: {str(e)}")
|
| 126 |
+
self.model_loaded.set()
|
| 127 |
|
| 128 |
def generate_text(self, text, gen_characters, gen_token_multiplier, instruction):
|
| 129 |
if not self.llm:
|
| 130 |
self.load_model()
|
| 131 |
|
| 132 |
+
if not self.model_loaded.wait(timeout=30) or not self.llm:
|
| 133 |
+
return "モデルのロードに失敗しました。設定を確認してください。"
|
| 134 |
+
|
| 135 |
+
input_str = f"{instruction}\n\n生成するテキスト(目安は{gen_characters}文字):\n\n{text}"
|
| 136 |
+
return self.generate_response(input_str, gen_characters, gen_token_multiplier)
|
| 137 |
+
|
| 138 |
+
def generate_response(self, input_str, gen_characters, gen_token_multiplier):
|
| 139 |
+
if not self.model_loaded.wait(timeout=30) or not self.llm:
|
| 140 |
return "モデルのロードに失敗しました。設定を確認してください。"
|
| 141 |
|
|
|
|
| 142 |
max_tokens = int(gen_characters * gen_token_multiplier)
|
| 143 |
|
| 144 |
+
if not self.use_chat_format:
|
| 145 |
+
try:
|
| 146 |
+
prompt = self._generate_prompt(input_str)
|
| 147 |
+
res = self.llm.create_completion(prompt=prompt, max_tokens=max_tokens)
|
| 148 |
+
res_text = res["choices"][0]["text"]
|
| 149 |
+
self.history.append({"user": input_str, "assistant": res_text})
|
| 150 |
+
return res_text
|
| 151 |
+
except Exception as e:
|
| 152 |
+
print(f"既存の形式でのレスポンス生成に失敗しました: {str(e)}")
|
| 153 |
+
print("チャット形式に切り替えます。")
|
| 154 |
+
self.use_chat_format = True
|
| 155 |
|
| 156 |
+
if self.use_chat_format:
|
| 157 |
+
chat_messages = [
|
| 158 |
+
{"role": "system", "content": self.settings.get('gen_author_description', '')},
|
| 159 |
+
{"role": "user", "content": input_str}
|
| 160 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
try:
|
| 163 |
+
response = self.llm.create_chat_completion(
|
| 164 |
+
messages=chat_messages,
|
| 165 |
+
max_tokens=max_tokens,
|
| 166 |
+
temperature=self.settings['gen_temperature'],
|
| 167 |
+
top_p=self.settings['gen_top_p'],
|
| 168 |
+
top_k=self.settings['gen_top_k'],
|
| 169 |
+
repeat_penalty=self.settings['gen_rep_pen'],
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
res_text = response["choices"][0]["message"]["content"].strip()
|
| 173 |
+
self.history.append({"user": input_str, "assistant": res_text})
|
| 174 |
+
return res_text
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"チャット形式でのレスポンス生成に失敗しました: {str(e)}")
|
| 177 |
+
return "レスポンス生成中にエラーが発生しました。設定を確認してください。"
|
| 178 |
+
|
| 179 |
+
def _generate_prompt(self, input_str):
|
| 180 |
+
return f"{self.settings.get('gen_author_description', '')}\n\nUser: {input_str}\nAssistant:"
|
| 181 |
|
| 182 |
def load_settings(self, filename):
|
| 183 |
self.settings = Settings.load_from_ini(filename)
|
| 184 |
|
| 185 |
+
def reset(self):
|
| 186 |
+
self.history = []
|
| 187 |
+
self.use_chat_format = False
|
| 188 |
+
|
| 189 |
# グローバル変数
|
| 190 |
text_generator = TextGenerator()
|
| 191 |
model_files = ModelManager.get_model_files()
|
|
|
|
| 302 |
input_component = gr.Textbox(label=key, value=value)
|
| 303 |
|
| 304 |
input_component.change(
|
| 305 |
+
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
| 306 |
+
input_component.change(
|
| 307 |
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
| 308 |
inputs=[input_component],
|
| 309 |
outputs=[output]
|
|
|
|
| 325 |
text_generator.load_settings(DEFAULT_INI_FILE)
|
| 326 |
|
| 327 |
demo = build_gradio_interface()
|
| 328 |
+
demo.launch(share=True)
|