ICTNLP/Llama-3.1-8B-Omni · Error launching the model worker.

Hello,

I am trying to run this on a AWS EC2 instance. It has a Tesla T4 GPU (16GB VRAM).
The first two commands runs fine but we i run the command to launch the model worker it doesn't work.

error log :
Traceback (most recent call last):
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1863, in _get_module
return importlib.import_module("." + module_name, self.name)
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/importlib/init.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1050, in _gcd_import
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/modeling_utils.py", line 50, in
from .integrations.flash_attention import flash_attention_forward
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/integrations/flash_attention.py", line 5, in
from ..modeling_flash_attention_utils import _flash_attention_forward
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/modeling_flash_attention_utils.py", line 30, in
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/flash_attn/init.py", line 3, in
from flash_attn.flash_attn_interface import (
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/flash_attn/flash_attn_interface.py", line 15, in
import flash_attn_2_cuda as flash_attn_gpu
ImportError: libcudart.so.11.0: cannot open shared object file: No such file or directory

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1863, in _get_module
return importlib.import_module("." + module_name, self.name)
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/importlib/init.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1050, in _gcd_import
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/integrations/integration_utils.py", line 36, in
from .. import PreTrainedModel, TFPreTrainedModel
File "", line 1075, in _handle_fromlist
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1851, in getattr
module = self._get_module(self._class_to_module[name])
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1865, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
libcudart.so.11.0: cannot open shared object file: No such file or directory

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1863, in _get_module
return importlib.import_module("." + module_name, self.name)
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/importlib/init.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1050, in _gcd_import
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/trainer.py", line 42, in
from .integrations import (
File "", line 1075, in _handle_fromlist
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1851, in getattr
module = self._get_module(self._class_to_module[name])
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1865, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
libcudart.so.11.0: cannot open shared object file: No such file or directory

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/ubuntu/LLaMA-Omni/omni_speech/serve/model_worker.py", line 23, in
from omni_speech.utils import (build_logger, server_error_msg,
File "/home/ubuntu/LLaMA-Omni/omni_speech/utils.py", line 177, in
def safe_save_model_for_hf_trainer(trainer: transformers.Trainer,
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1851, in getattr
module = self._get_module(self._class_to_module[name])
File "/home/ubuntu/miniconda3/envs/llama-omni/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1865, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
libcudart.so.11.0: cannot open shared object file: No such file or directory

CUDA version : 12.2
python 3.10.16

My environment :
accelerate==0.33.0
aiofiles==23.2.1
aiohappyeyeballs==2.4.6
aiohttp==3.11.12
aiosignal==1.3.2
annotated-types==0.7.0
antlr4-python3-runtime==4.8
anyio==4.8.0
asgiref==3.8.1
async-timeout==4.0.3
bitarray==3.1.1
bitsandbytes==0.43.1
certifi==2025.1.31
cffi==1.17.1
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6
contourpy==1.3.1
cramjam==2.9.1
cycler==0.12.1
Cython==3.0.12
dataclasses-json==0.6.7
Django==5.1.6
einops==0.6.1
einops-exts==0.0.4
exceptiongroup==1.2.2
exllamav2==0.2.8
-e git+https://github.com/pytorch/fairseq@ecbf110e1eb43861214b05fa001eff584954f65a#egg=fairseq
fastapi==0.112.4
fastparquet==2024.11.0
ffmpy==0.5.0
filelock==3.17.0
flash-attn==2.7.4.post1
fonttools==4.56.0
frozenlist==1.5.0
fsspec==2025.2.0
gradio==4.43.0
gradio_client==1.3.0
greenlet==3.1.1
gTTS==2.5.4
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
httpx-sse==0.4.0
huggingface-hub==0.29.2
hydra-core==1.0.7
idna==3.10
importlib_resources==6.5.2
Jinja2==3.1.6
joblib==1.4.2
jsonpatch==1.33
kiwisolver==1.4.8
langchain==0.3.19
langchain-community==0.3.17
langchain-core==0.3.36
langchain-ollama==0.2.3
langchain-text-splitters==0.3.6
langsmith==0.3.8
latex2mathml==3.77.0
-e git+https://github.com/ictnlp/LLaMA-Omni@544d0ff3de8817fdcbc5192941a11cf4a72cbf2b#egg=llama_omni
llvmlite==0.44.0
lxml==5.3.1
markdown-it-py==3.0.0
markdown2==2.5.3
MarkupSafe==2.1.5
marshmallow==3.26.1
matplotlib==3.10.1
mdurl==0.1.2
more-itertools==10.6.0
mpmath==1.3.0
multidict==6.1.0
mypy-extensions==1.0.0
networkx==3.4.2
ninja==1.11.1.3
numba==0.61.0
numpy==1.26.4
nvidia-cublas-cu12==12.4.5.8
nvidia-cuda-cupti-cu12==12.4.127
nvidia-cuda-nvrtc-cu12==12.4.127
nvidia-cuda-runtime-cu12==12.4.127
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.2.1.3
nvidia-curand-cu12==10.3.5.147
nvidia-cusolver-cu12==11.6.1.9
nvidia-cusparse-cu12==12.3.1.170
nvidia-cusparselt-cu12==0.6.2
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.4.127
ollama==0.4.7
omegaconf==2.0.6
openai-whisper==20240930
orjson==3.10.15
packaging==24.2
pandas==2.2.3
peft==0.11.1
pillow==10.4.0
portalocker==3.1.1
propcache==0.2.1
psutil==7.0.0
pycparser==2.22
pydantic==2.10.6
pydantic-settings==2.7.1
pydantic_core==2.27.2
pydub==0.25.1
Pygments==2.19.1
pyparsing==3.2.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.20
pytz==2025.1
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
requests-toolbelt==1.0.0
rich==13.9.4
ruff==0.9.9
sacrebleu==2.5.1
safetensors==0.5.3
scikit-learn==1.2.2
scipy==1.15.2
semantic-version==2.10.0
sentencepiece==0.1.99
shellingham==1.5.4
shortuuid==1.0.13
six==1.17.0
sniffio==1.3.1
soundfile==0.13.1
SQLAlchemy==2.0.38
sqlparse==0.5.3
starlette==0.38.6
svgwrite==1.4.3
sympy==1.13.1
tabulate==0.9.0
tenacity==9.0.0
threadpoolctl==3.5.0
tiktoken==0.9.0
timm==0.6.13
tokenizers==0.21.0
tomlkit==0.12.0
torch==2.6.0
torchaudio==2.1.2
torchvision==0.16.2
tqdm==4.67.1
transformers==4.49.0
triton==3.2.0
typer==0.15.2
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2025.1
urllib3==2.3.0
uvicorn==0.34.0
wavedrom==2.0.3.post3
websockets==12.0
yarl==1.18.3
zstandard==0.23.0