Spaces:

Emova-ollm
/

EMOVA-demo

Running on Zero

App Files Files Community

KaiChen1998 commited on 11 days ago

Commit

3c042eb

verified ·

1 Parent(s): bda1d6a

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -22,10 +22,9 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
 ##########################################
 from huggingface_hub import snapshot_download
 snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
-os.system("cd emova_speech_tokenizer && pip install -e .")
-from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
-from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
 ####################
 # S2U
@@ -77,6 +76,7 @@ mode2func = dict(
 ##########################################
 # LLM part
 ##########################################
 import torch
 from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
@@ -236,7 +236,7 @@ def http_bot(state, temperature, top_p, max_new_tokens, speaker):
     inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
     inputs.to(model.device)
     if len(all_images) > 0:
-        inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
     # Process hyperparameters
     temperature = float(pload.get("temperature", 1.0))
@@ -361,7 +361,7 @@ For an optimal experience, please use desktop computers for this demo, as mobile
 learn_more_markdown = ("""
 ## License
-The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 ## Acknowledgement
 The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.

 ##########################################
 from huggingface_hub import snapshot_download
 snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
+from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
+from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
 ####################
 # S2U
 ##########################################
 # LLM part
+# TODO: 1) change model 2) change arguments
 ##########################################
 import torch
 from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
     inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
     inputs.to(model.device)
     if len(all_images) > 0:
+        inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype) # TODO
     # Process hyperparameters
     temperature = float(pload.get("temperature", 1.0))
 learn_more_markdown = ("""
 ## License
+The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/QwenLM/Qwen/blob/main/LICENSE) of Qwen and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 ## Acknowledgement
 The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.