Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -22,10 +22,9 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
|
|
22 |
##########################################
|
23 |
from huggingface_hub import snapshot_download
|
24 |
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
|
25 |
-
os.system("cd emova_speech_tokenizer && pip install -e .")
|
26 |
|
27 |
-
from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
28 |
-
from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
29 |
|
30 |
####################
|
31 |
# S2U
|
@@ -77,6 +76,7 @@ mode2func = dict(
|
|
77 |
|
78 |
##########################################
|
79 |
# LLM part
|
|
|
80 |
##########################################
|
81 |
import torch
|
82 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
@@ -236,7 +236,7 @@ def http_bot(state, temperature, top_p, max_new_tokens, speaker):
|
|
236 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
237 |
inputs.to(model.device)
|
238 |
if len(all_images) > 0:
|
239 |
-
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
|
240 |
|
241 |
# Process hyperparameters
|
242 |
temperature = float(pload.get("temperature", 1.0))
|
@@ -361,7 +361,7 @@ For an optimal experience, please use desktop computers for this demo, as mobile
|
|
361 |
|
362 |
learn_more_markdown = ("""
|
363 |
## License
|
364 |
-
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/
|
365 |
|
366 |
## Acknowledgement
|
367 |
The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.
|
|
|
22 |
##########################################
|
23 |
from huggingface_hub import snapshot_download
|
24 |
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
|
|
|
25 |
|
26 |
+
from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
27 |
+
from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
28 |
|
29 |
####################
|
30 |
# S2U
|
|
|
76 |
|
77 |
##########################################
|
78 |
# LLM part
|
79 |
+
# TODO: 1) change model 2) change arguments
|
80 |
##########################################
|
81 |
import torch
|
82 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
|
|
236 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
237 |
inputs.to(model.device)
|
238 |
if len(all_images) > 0:
|
239 |
+
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype) # TODO
|
240 |
|
241 |
# Process hyperparameters
|
242 |
temperature = float(pload.get("temperature", 1.0))
|
|
|
361 |
|
362 |
learn_more_markdown = ("""
|
363 |
## License
|
364 |
+
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/QwenLM/Qwen/blob/main/LICENSE) of Qwen and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
365 |
|
366 |
## Acknowledgement
|
367 |
The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.
|