File size: 4,393 Bytes
9bcb78e 9307bce 9bcb78e 9307bce 9bcb78e 9307bce 9bcb78e d7e42a3 9bcb78e 9307bce f3cac53 9307bce 9bcb78e 92886bd 9bcb78e 92886bd 9bcb78e 67e3618 9bcb78e d7e42a3 9bcb78e f3cac53 9307bce 9bcb78e 4ca4e8f 9bcb78e 1bbbf96 f3cac53 9bcb78e 9307bce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
#!/bin/bash
# converted from windows-setup.bat by github copilot
# check if "venv" subdirectory exists, if not, create one
if [ ! -d "venv" ]; then
python -m venv venv
else
read -p "venv directory already exists. Looking to upgrade/reinstall exllama? (will reinstall python venv) (y/n) " reinst
if [ "$reinst" = "y" ]; then
rm -rf venv
python -m venv venv
else
exit
fi
fi
# ask if the user has git installed
read -p "Do you have git and wget installed? (y/n) " gitwget
if [ "$gitwget" = "y" ]; then
echo "Setting up environment"
else
echo "Please install git and wget from your distro's package manager before running this script."
echo "Example for Debian-based: sudo apt-get install git wget"
echo "Example for Arch-based: sudo pacman -S git wget"
read -p "Press enter to continue"
exit
fi
# ask for exllamav2 version
read -p "Want to build stable or dev version of exllamav2? (stable, dev): " exllamav2_version
if [ "$exllamav2_version" != "stable" ] && [ "$exllamav2_version" != "dev" ]; then
echo "Invalid version of exllama. Please enter stable or dev."
read -p "Press enter to continue"
exit
fi
# if CUDA version 12 install pytorch for 12.1, else if CUDA 11 install pytorch for 11.8. If ROCm, install pytorch for ROCm 5.7
read -p "Please enter your GPU compute version, CUDA 11/12 or AMD ROCm (11, 12, rocm): " pytorch_version
# ask to install flash attention
echo "Flash attention is a feature that could fix overflow issues on some more broken models, however, it will increase install time by a few hours."
read -p "Would you like to install flash-attention? (rarely needed and optional) (y/n) " flash_attention
if [ "$flash_attention" != "y" ] && [ "$flash_attention" != "n" ]; then
echo "Invalid input. Please enter y or n."
read -p "Press enter to continue"
exit
fi
if [ "$pytorch_version" = "11" ]; then
echo "Installing PyTorch for CUDA 11.8"
venv/bin/python -m pip install torch --index-url https://download.pytorch.org/whl/cu118 --upgrade
elif [ "$pytorch_version" = "12" ]; then
echo "Installing PyTorch for CUDA 12.1"
venv/bin/python -m pip install torch
elif [ "$pytorch_version" = "rocm" ]; then
echo "Installing PyTorch for AMD ROCm 5.7"
venv/bin/python -m pip install torch --index-url https://download.pytorch.org/whl/rocm5.7 --upgrade
else
echo "Invalid compute version. Please enter 11, 12, or rocm."
read -p "Press enter to continue"
exit
fi
echo "Deleting potential conflicting files"
rm convert-to-safetensors.py
rm download-model.py
rm -rf exllamav2
rm start-quant.sh
rm enter-venv.sh
# download stuff
echo "Downloading files"
if [ "$exllamav2_version" = "stable" ]; then
git clone https://github.com/turboderp/exllamav2
elif [ "$exllamav2_version" = "dev" ]; then
git clone https://github.com/turboderp/exllamav2 -b dev
fi
wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/convert-to-safetensors.py
wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py
echo "Installing pip packages"
venv/bin/python -m pip install -r exllamav2/requirements.txt
venv/bin/python -m pip install huggingface-hub transformers accelerate
venv/bin/python -m pip install ./exllamav2
echo "Writing shell files..."
# create start-quant.sh
echo "#!/bin/bash" > start-quant.sh
echo "venv/bin/python exl2-quant.py" >> start-quant.sh
echo "read -p \"Press enter to continue\"" >> start-quant.sh
echo "exit" >> start-quant.sh
chmod +x start-quant.sh
# create enter-venv.sh
echo "#!/bin/bash" > enter-venv.sh
echo "bash --init-file venv/bin/activate" >> enter-venv.sh
chmod +x enter-venv.sh
if [ "$flash_attention" = "y" ]; then
echo "Going to attempt to install flash attention but it isn't required."
echo "You may close now if you'd like and continue without flash attention."
read -p "Press enter to continue and install flash attention"
echo "Get some popcorn and watch a movie, this will take a while."
echo "Installing flash-attn..."
venv/bin/python -m pip install git+https://github.com/Dao-AILab/flash-attention.git
fi
echo "If you use ctrl+c to stop, you may need to also use 'pkill python' to stop running scripts."
echo "Environment setup complete. run start-quant.sh to start the quantization process."
read -p "Press enter to exit"
exit
|