Spaces:
Sleeping
Sleeping
Commit
·
4386cfd
0
Parent(s):
Duplicate from AchyuthGamer/ramba
Browse files- .gitattributes +35 -0
- .gitignore +3 -0
- Dockerfile +15 -0
- README.md +12 -0
- achyuthailogo.png +0 -0
- app.py +262 -0
- requirements.txt +2 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv
|
| 2 |
+
.mypy_cache
|
| 3 |
+
__pycache__
|
Dockerfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.10
|
| 5 |
+
|
| 6 |
+
COPY app.py .
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
|
| 9 |
+
RUN python -m venv venv
|
| 10 |
+
RUN ./venv/bin/pip install -r requirements.txt
|
| 11 |
+
|
| 12 |
+
ENV H2O_WAVE_LISTEN=":7860"
|
| 13 |
+
ENV H2O_WAVE_ADDRESS="http://127.0.0.1:7860"
|
| 14 |
+
|
| 15 |
+
CMD ["./venv/bin/wave", "run", "app.py", "--no-reload"]
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: h2oGPT - ChatBot
|
| 3 |
+
emoji: 💻
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: apache-2.0
|
| 9 |
+
duplicated_from: AchyuthGamer/ramba
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
achyuthailogo.png
ADDED
|
app.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from h2o_wave import main, app, Q, ui, data
|
| 2 |
+
from gradio_client import Client
|
| 3 |
+
import ast
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
async def init_ui(q: Q) -> None:
|
| 7 |
+
q.page['meta'] = ui.meta_card(
|
| 8 |
+
box='',
|
| 9 |
+
layouts=[
|
| 10 |
+
ui.layout(breakpoint='xs', min_height='100vh', zones=[
|
| 11 |
+
ui.zone('main', size='1', direction=ui.ZoneDirection.ROW, zones=[
|
| 12 |
+
ui.zone('sidebar', size='250px'),
|
| 13 |
+
ui.zone('body', direction=ui.ZoneDirection.COLUMN, zones=[
|
| 14 |
+
ui.zone('title', size='55px'),
|
| 15 |
+
ui.zone('content', size='1'),
|
| 16 |
+
ui.zone('footer'),
|
| 17 |
+
]),
|
| 18 |
+
])
|
| 19 |
+
])
|
| 20 |
+
],
|
| 21 |
+
title='AchyuthGPT',
|
| 22 |
+
)
|
| 23 |
+
q.page['sidebar'] = ui.nav_card(
|
| 24 |
+
box='sidebar', color='primary', title='AchyuthGPT', subtitle='Programmed by Achyuth',
|
| 25 |
+
value=f"#{q.args['#']}' if q.args['#'] else '#page1",
|
| 26 |
+
image='https://huggingface.co/spaces/AchyuthGamer/AchyuthGPT-v1/resolve/main/achyuthailogo.png', items=[
|
| 27 |
+
ui.nav_group('', items=[
|
| 28 |
+
ui.nav_item(name='dwave-docs', label='Wave docs', path='https://AchyuthGPT.blogspot.com/'),
|
| 29 |
+
ui.nav_item(name='Achyuth-GPT', label='Achyuth GPT', path='https://github.com/achyuth4/AchyuthGPT-llmstudio'),
|
| 30 |
+
ui.nav_item(name='fine-tune', label='LLM Studio', path='https://github.com/achyuth4/AchyuthGPT-llmstudio'),
|
| 31 |
+
ui.nav_item(name='more-models', label='More models', path='https://huggingface.co/achyuthgamer'),
|
| 32 |
+
]),
|
| 33 |
+
],
|
| 34 |
+
secondary_items=[
|
| 35 |
+
ui.toggle(name='dark_mode', label='Dark mode', trigger=True),
|
| 36 |
+
ui.text('<center>Developer - N.Achyuth Reddy.</center>')
|
| 37 |
+
]
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
q.page['chatbot'] = ui.chatbot_card(
|
| 41 |
+
box=ui.box('content'),
|
| 42 |
+
data=data('content from_user', t='list'),
|
| 43 |
+
name='chatbot'
|
| 44 |
+
)
|
| 45 |
+
q.page['title'] = ui.section_card(
|
| 46 |
+
box='title',
|
| 47 |
+
title='',
|
| 48 |
+
subtitle='',
|
| 49 |
+
items=[
|
| 50 |
+
ui.dropdown(name='model', trigger=True, label='', value='gpt', choices=[
|
| 51 |
+
ui.choice(name='gpt', label='AchyuthGPT-1'),
|
| 52 |
+
ui.choice(name='falcon', label='AchyuthGPT-2'),
|
| 53 |
+
ui.choice(name='llma', label='AchyuthGPT-3'),
|
| 54 |
+
ui.choice(name='mpt', label='AchyuthGPT-4'),
|
| 55 |
+
ui.choice(name='lmsys', label='AchyuthGPT-5'),
|
| 56 |
+
ui.choice(name='gpt-3.5-turbo', label='AchyuthGPT-6'),
|
| 57 |
+
]),
|
| 58 |
+
ui.button(name='clear', label='Clear', icon='Delete'),
|
| 59 |
+
],
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
"""
|
| 63 |
+
:param load_8bit: load model in 8-bit using bitsandbytes
|
| 64 |
+
:param load_4bit: load model in 4-bit using bitsandbytes
|
| 65 |
+
:param load_half: load model in float16
|
| 66 |
+
:param infer_devices: whether to control devices with gpu_id. If False, then spread across GPUs
|
| 67 |
+
:param base_model: model HF-type name. If use --base_model to preload model, cannot unload in gradio in models tab
|
| 68 |
+
:param tokenizer_base_model: tokenizer HF-type name. Usually not required, inferred from base_model.
|
| 69 |
+
:param lora_weights: LORA weights path/HF link
|
| 70 |
+
:param gpu_id: if infer_devices, then use gpu_id for cuda device ID, or auto mode if gpu_id != -1
|
| 71 |
+
:param compile_model Whether to compile the model
|
| 72 |
+
:param use_cache: Whether to use caching in model (some models fail when multiple threads use)
|
| 73 |
+
:param inference_server: Consume base_model as type of model at this address
|
| 74 |
+
Address can be text-generation-server hosting that base_model
|
| 75 |
+
e.g. python generate.py --inference_server="http://192.168.1.46:6112" --base_model=h2oai/h2ogpt-oasst1-512-12b
|
| 76 |
+
Or Address can be "openai_chat" or "openai" for OpenAI API
|
| 77 |
+
e.g. python generate.py --inference_server="openai_chat" --base_model=gpt-3.5-turbo
|
| 78 |
+
e.g. python generate.py --inference_server="openai" --base_model=text-davinci-003
|
| 79 |
+
:param prompt_type: type of prompt, usually matched to fine-tuned model or plain for foundational model
|
| 80 |
+
:param prompt_dict: If prompt_type=custom, then expects (some) items returned by get_prompt(..., return_dict=True)
|
| 81 |
+
:param model_lock: Lock models to specific combinations, for ease of use and extending to many models
|
| 82 |
+
Only used if gradio = True
|
| 83 |
+
List of dicts, each dict has base_model, tokenizer_base_model, lora_weights, inference_server, prompt_type, and prompt_dict
|
| 84 |
+
If all models have same prompt_type, and prompt_dict, can still specify that once in CLI outside model_lock as default for dict
|
| 85 |
+
Can specify model_lock instead of those items on CLI
|
| 86 |
+
As with CLI itself, base_model can infer prompt_type and prompt_dict if in prompter.py.
|
| 87 |
+
Also, tokenizer_base_model and lora_weights are optional.
|
| 88 |
+
Also, inference_server is optional if loading model from local system.
|
| 89 |
+
All models provided will automatically appear in compare model mode
|
| 90 |
+
Model loading-unloading and related choices will be disabled. Model/lora/server adding will be disabled
|
| 91 |
+
:param model_lock_columns: How many columns to show if locking models (and so showing all at once)
|
| 92 |
+
If None, then defaults to up to 3
|
| 93 |
+
if -1, then all goes into 1 row
|
| 94 |
+
Maximum value is 4 due to non-dynamic gradio rendering elements
|
| 95 |
+
:param fail_if_cannot_connect: if doing model locking (e.g. with many models), fail if True. Otherwise ignore.
|
| 96 |
+
Useful when many endpoints and want to just see what works, but still have to wait for timeout.
|
| 97 |
+
:param temperature: generation temperature
|
| 98 |
+
:param top_p: generation top_p
|
| 99 |
+
:param top_k: generation top_k
|
| 100 |
+
:param num_beams: generation number of beams
|
| 101 |
+
:param repetition_penalty: generation repetition penalty
|
| 102 |
+
:param num_return_sequences: generation number of sequences (1 forced for chat)
|
| 103 |
+
:param do_sample: generation sample
|
| 104 |
+
:param max_new_tokens: generation max new tokens
|
| 105 |
+
:param min_new_tokens: generation min tokens
|
| 106 |
+
:param early_stopping: generation early stopping
|
| 107 |
+
:param max_time: maximum time to allow for generation
|
| 108 |
+
:param memory_restriction_level: 0 = no restriction to tokens or model, 1 = some restrictions on token 2 = HF like restriction 3 = very low memory case
|
| 109 |
+
:param debug: enable debug mode
|
| 110 |
+
:param save_dir: directory chat data is saved to
|
| 111 |
+
:param share: whether to share the gradio app with sharable URL
|
| 112 |
+
:param local_files_only: whether to only use local files instead of doing to HF for models
|
| 113 |
+
:param resume_download: whether to resume downloads from HF for models
|
| 114 |
+
:param use_auth_token: whether to use HF auth token (requires CLI did huggingface-cli login before)
|
| 115 |
+
:param trust_remote_code: whether to use trust any code needed for HF model
|
| 116 |
+
:param offload_folder: path for spilling model onto disk
|
| 117 |
+
:param src_lang: source languages to include if doing translation (None = all)
|
| 118 |
+
:param tgt_lang: target languages to include if doing translation (None = all)
|
| 119 |
+
:param cli: whether to use CLI (non-gradio) interface.
|
| 120 |
+
:param cli_loop: whether to loop for CLI (False usually only for testing)
|
| 121 |
+
:param gradio: whether to enable gradio, or to enable benchmark mode
|
| 122 |
+
:param gradio_offline_level: > 0, then change fonts so full offline
|
| 123 |
+
== 1 means backend won't need internet for fonts, but front-end UI might if font not cached
|
| 124 |
+
== 2 means backend and frontend don't need internet to download any fonts.
|
| 125 |
+
Note: Some things always disabled include HF telemetry, gradio telemetry, chromadb posthog that involve uploading.
|
| 126 |
+
This option further disables google fonts for downloading, which is less intrusive than uploading,
|
| 127 |
+
but still required in air-gapped case. The fonts don't look as nice as google fonts, but ensure full offline behavior.
|
| 128 |
+
Also set --share=False to avoid sharing a gradio live link.
|
| 129 |
+
:param chat: whether to enable chat mode with chat history
|
| 130 |
+
:param chat_context: whether to use extra helpful context if human_bot
|
| 131 |
+
:param stream_output: whether to stream output
|
| 132 |
+
:param show_examples: whether to show clickable examples in gradio
|
| 133 |
+
:param verbose: whether to show verbose prints
|
| 134 |
+
:param h2ocolors: whether to use H2O.ai theme
|
| 135 |
+
:param height: height of chat window
|
| 136 |
+
:param show_lora: whether to show LORA options in UI (expert so can be hard to understand)
|
| 137 |
+
:param login_mode_if_model0: set to True to load --base_model after client logs in, to be able to free GPU memory when model is swapped
|
| 138 |
+
:param block_gradio_exit: whether to block gradio exit (used for testing)
|
| 139 |
+
:param concurrency_count: gradio concurrency count (1 is optimal for LLMs)
|
| 140 |
+
:param api_open: If False, don't let API calls skip gradio queue
|
| 141 |
+
:param allow_api: whether to allow API calls at all to gradio server
|
| 142 |
+
:param input_lines: how many input lines to show for chat box (>1 forces shift-enter for submit, else enter is submit)
|
| 143 |
+
:param gradio_size: Overall size of text and spaces: "xsmall", "small", "medium", "large".
|
| 144 |
+
Small useful for many chatbots in model_lock mode
|
| 145 |
+
:param auth: gradio auth for launcher in form [(user1, pass1), (user2, pass2), ...]
|
| 146 |
+
e.g. --auth=[('jon','password')] with no spaces
|
| 147 |
+
:param max_max_time: Maximum max_time for gradio slider
|
| 148 |
+
:param max_max_new_tokens: Maximum max_new_tokens for gradio slider
|
| 149 |
+
:param sanitize_user_prompt: whether to remove profanity from user input (slows down input processing)
|
| 150 |
+
:param sanitize_bot_response: whether to remove profanity and repeat lines from bot output (about 2x slower generation for long streaming cases due to better_profanity being slow)
|
| 151 |
+
:param extra_model_options: extra models to show in list in gradio
|
| 152 |
+
:param extra_lora_options: extra LORA to show in list in gradio
|
| 153 |
+
:param extra_server_options: extra servers to show in list in gradio
|
| 154 |
+
:param score_model: which model to score responses (None means no scoring)
|
| 155 |
+
:param eval_filename: json file to use for evaluation, if None is sharegpt
|
| 156 |
+
:param eval_prompts_only_num: for no gradio benchmark, if using eval_filename prompts for eval instead of examples
|
| 157 |
+
:param eval_prompts_only_seed: for no gradio benchmark, seed for eval_filename sampling
|
| 158 |
+
:param eval_as_output: for no gradio benchmark, whether to test eval_filename output itself
|
| 159 |
+
:param langchain_mode: Data source to include. Choose "UserData" to only consume files from make_db.py.
|
| 160 |
+
WARNING: wiki_full requires extra data processing via read_wiki_full.py and requires really good workstation to generate db, unless already present.
|
| 161 |
+
:param langchain_action: Mode langchain operations in on documents.
|
| 162 |
+
Query: Make query of document(s)
|
| 163 |
+
Summarize or Summarize_map_reduce: Summarize document(s) via map_reduce
|
| 164 |
+
Summarize_all: Summarize document(s) using entire document at once
|
| 165 |
+
Summarize_refine: Summarize document(s) using entire document, and try to refine before returning summary
|
| 166 |
+
:param force_langchain_evaluate: Whether to force langchain LLM use even if not doing langchain, mostly for testing.
|
| 167 |
+
:param user_path: user path to glob from to generate db for vector search, for 'UserData' langchain mode.
|
| 168 |
+
If already have db, any new/changed files are added automatically if path set, does not have to be same path used for prior db sources
|
| 169 |
+
:param detect_user_path_changes_every_query: whether to detect if any files changed or added every similarity search (by file hashes).
|
| 170 |
+
Expensive for large number of files, so not done by default. By default only detect changes during db loading.
|
| 171 |
+
:param visible_langchain_modes: dbs to generate at launch to be ready for LLM
|
| 172 |
+
Can be up to ['wiki', 'wiki_full', 'UserData', 'MyData', 'github h2oGPT', 'DriverlessAI docs']
|
| 173 |
+
But wiki_full is expensive and requires preparation
|
| 174 |
+
To allow scratch space only live in session, add 'MyData' to list
|
| 175 |
+
Default: If only want to consume local files, e.g. prepared by make_db.py, only include ['UserData']
|
| 176 |
+
FIXME: Avoid 'All' for now, not implemented
|
| 177 |
+
:param visible_langchain_actions: Which actions to allow
|
| 178 |
+
:param document_choice: Default document choice when taking subset of collection
|
| 179 |
+
:param load_db_if_exists: Whether to load chroma db if exists or re-generate db
|
| 180 |
+
:param keep_sources_in_context: Whether to keep url sources in context, not helpful usually
|
| 181 |
+
:param db_type: 'faiss' for in-memory or 'chroma' or 'weaviate' for persisted on disk
|
| 182 |
+
:param use_openai_embedding: Whether to use OpenAI embeddings for vector db
|
| 183 |
+
:param use_openai_model: Whether to use OpenAI model for use with vector db
|
| 184 |
+
:param hf_embedding_model: Which HF embedding model to use for vector db
|
| 185 |
+
Default is instructor-large with 768 parameters per embedding if have GPUs, else all-MiniLM-L6-v1 if no GPUs
|
| 186 |
+
Can also choose simpler model with 384 parameters per embedding: "sentence-transformers/all-MiniLM-L6-v2"
|
| 187 |
+
Can also choose even better embedding with 1024 parameters: 'hkunlp/instructor-xl'
|
| 188 |
+
We support automatically changing of embeddings for chroma, with a backup of db made if this is done
|
| 189 |
+
:param allow_upload_to_user_data: Whether to allow file uploads to update shared vector db
|
| 190 |
+
:param allow_upload_to_my_data: Whether to allow file uploads to update scratch vector db
|
| 191 |
+
:param enable_url_upload: Whether to allow upload from URL
|
| 192 |
+
:param enable_text_upload: Whether to allow upload of text
|
| 193 |
+
:param enable_sources_list: Whether to allow list (or download for non-shared db) of list of sources for chosen db
|
| 194 |
+
:param chunk: Whether to chunk data (True unless know data is already optimally chunked)
|
| 195 |
+
:param chunk_size: Size of chunks, with typically top-4 passed to LLM, so neesd to be in context length
|
| 196 |
+
:param top_k_docs: number of chunks to give LLM
|
| 197 |
+
:param reverse_docs: whether to reverse docs order so most relevant is closest to question.
|
| 198 |
+
Best choice for sufficiently smart model, and truncation occurs for oldest context, so best then too.
|
| 199 |
+
But smaller 6_9 models fail to use newest context and can get stuck on old information.
|
| 200 |
+
:param auto_reduce_chunks: Whether to automatically reduce top_k_docs to fit context given prompt
|
| 201 |
+
:param max_chunks: If top_k_docs=-1, maximum number of chunks to allow
|
| 202 |
+
:param n_jobs: Number of processors to use when consuming documents (-1 = all, is default)
|
| 203 |
+
:param enable_captions: Whether to support captions using BLIP for image files as documents, then preloads that model
|
| 204 |
+
:param captions_model: Which model to use for captions.
|
| 205 |
+
captions_model: str = "Salesforce/blip-image-captioning-base", # continue capable
|
| 206 |
+
captions_model: str = "Salesforce/blip2-flan-t5-xl", # question/answer capable, 16GB state
|
| 207 |
+
captions_model: str = "Salesforce/blip2-flan-t5-xxl", # question/answer capable, 60GB state
|
| 208 |
+
Note: opt-based blip2 are not permissive license due to opt and Meta license restrictions
|
| 209 |
+
:param pre_load_caption_model: Whether to preload caption model, or load after forking parallel doc loader
|
| 210 |
+
parallel loading disabled if preload and have images, to prevent deadlocking on cuda context
|
| 211 |
+
Recommended if using larger caption model
|
| 212 |
+
:param caption_gpu: If support caption, then use GPU if exists
|
| 213 |
+
:param enable_ocr: Whether to support OCR on images
|
| 214 |
+
:return:
|
| 215 |
+
"""
|
| 216 |
+
|
| 217 |
+
@app('/')
|
| 218 |
+
async def serve(q: Q):
|
| 219 |
+
if not q.client.initialized:
|
| 220 |
+
await init_ui(q)
|
| 221 |
+
q.client.model_client = Client('https://gpt.h2o.ai/')
|
| 222 |
+
q.client.initialized = True
|
| 223 |
+
|
| 224 |
+
# A new message arrived.
|
| 225 |
+
if q.args.chatbot:
|
| 226 |
+
# Append user message.
|
| 227 |
+
q.page['chatbot'].data += [q.args.chatbot, True]
|
| 228 |
+
# Append bot response.
|
| 229 |
+
kwargs = dict(instruction_nochat=q.args.chatbot)
|
| 230 |
+
try:
|
| 231 |
+
res = q.client.model_client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
|
| 232 |
+
bot_res = ast.literal_eval(res)['response']
|
| 233 |
+
q.page['chatbot'].data += [bot_res, False]
|
| 234 |
+
except:
|
| 235 |
+
q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
|
| 236 |
+
text='An error occurred during prediction. Please try later or a different model.',
|
| 237 |
+
type='error',
|
| 238 |
+
))
|
| 239 |
+
elif q.args.clear:
|
| 240 |
+
# Recreate the card.
|
| 241 |
+
q.page['chatbot'] = ui.chatbot_card(
|
| 242 |
+
box=ui.box('content'),
|
| 243 |
+
data=data('content from_user', t='list'),
|
| 244 |
+
name='chatbot'
|
| 245 |
+
)
|
| 246 |
+
elif q.args.dark_mode is not None:
|
| 247 |
+
q.page['meta'].theme = 'achyuthgpt-dark' if q.args.dark_mode else 'light'
|
| 248 |
+
q.page['sidebar'].color = 'card' if q.args.dark_mode else 'primary'
|
| 249 |
+
elif q.args.model:
|
| 250 |
+
try:
|
| 251 |
+
q.client.model_client = Client(f'https://{q.args.model}.h2o.ai/')
|
| 252 |
+
q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
|
| 253 |
+
text='Model changed successfully.',
|
| 254 |
+
type='success',
|
| 255 |
+
))
|
| 256 |
+
except:
|
| 257 |
+
q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
|
| 258 |
+
text='An error occurred while changing the model. Please try a different one.',
|
| 259 |
+
type='error',
|
| 260 |
+
))
|
| 261 |
+
|
| 262 |
+
await q.page.save()
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
h2o-wave
|
| 2 |
+
gradio-client
|