contenteaseAI commited on
Commit
ed5c12c
·
verified ·
1 Parent(s): 8a9e8c8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -93
app.py CHANGED
@@ -4,25 +4,31 @@ import subprocess
4
  from llama_cpp import Llama
5
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
  from llama_cpp_agent.providers import LlamaCppPythonProvider
7
- from llama_cpp_agent.chat_history import BasicChatHistory
8
- from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
 
 
11
 
12
- hf_hub_download(
13
- repo_id="bartowski/gemma-2-9b-it-GGUF",
14
- filename="gemma-2-9b-it-Q5_K_M.gguf",
15
- local_dir="./models"
16
- )
17
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- hf_hub_download(
21
- repo_id="bartowski/gemma-2-27b-it-GGUF",
22
- filename="gemma-2-27b-it-Q5_K_M.gguf",
23
- local_dir="./models"
24
- )
25
-
26
 
27
  llm = None
28
  llm_model = None
@@ -30,16 +36,12 @@ llm_model = None
30
  @spaces.GPU(duration=120)
31
  def respond(
32
  message,
33
- history: list[tuple[str, str]],
34
  model,
35
  system_message,
36
  max_tokens,
37
  temperature,
38
- top_p,
39
- top_k,
40
- repeat_penalty,
41
  ):
42
- chat_template = MessagesFormatterType.GEMMA_2
43
 
44
  global llm
45
  global llm_model
@@ -48,7 +50,7 @@ def respond(
48
  llm = Llama(
49
  model_path=f"models/{model}",
50
  flash_attn=True,
51
- n_gpu_layers=81,
52
  n_batch=1024,
53
  n_ctx=8192,
54
  )
@@ -65,30 +67,12 @@ def respond(
65
 
66
  settings = provider.get_provider_default_settings()
67
  settings.temperature = temperature
68
- settings.top_k = top_k
69
- settings.top_p = top_p
70
  settings.max_tokens = max_tokens
71
- settings.repeat_penalty = repeat_penalty
72
  settings.stream = True
73
-
74
- messages = BasicChatHistory()
75
-
76
- for msn in history:
77
- user = {
78
- 'role': Roles.user,
79
- 'content': msn[0]
80
- }
81
- assistant = {
82
- 'role': Roles.assistant,
83
- 'content': msn[1]
84
- }
85
- messages.add_message(user)
86
- messages.add_message(assistant)
87
 
88
  stream = agent.get_chat_response(
89
  message,
90
  llm_sampling_settings=settings,
91
- chat_history=messages,
92
  returns_streaming_generator=True,
93
  print_output=False
94
  )
@@ -98,64 +82,52 @@ def respond(
98
  outputs += output
99
  yield outputs
100
 
101
- description = """<p align="center"><a href="https://huggingface.co/spaces/gokaygokay/Gemma-2-llamacpp" target="_blank">[Reference Space]</a></p>
102
- <p><center>
103
- <p align="center">Defaults to 9B it (you can switch to other from additional inputs)</p>
104
- <p><center>
105
- <a href="https://huggingface.co/google/gemma-2-27b-it" target="_blank">[27B it Model]</a>
106
- <a href="https://huggingface.co/google/gemma-2-9b-it" target="_blank">[9B it Model]</a>
107
- <a href="https://huggingface.co/bartowski/gemma-2-27b-it-GGUF" target="_blank">[27B it Model GGUF]</a>
108
- <a href="https://huggingface.co/bartowski/gemma-2-9b-it-GGUF" target="_blank">[9B it Model GGUF]</a>
109
- </center></p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  """
 
 
111
 
112
- demo = gr.ChatInterface(
113
- respond,
114
- additional_inputs=[
115
- gr.Dropdown([
116
- 'gemma-2-9b-it-Q5_K_M.gguf',
117
- 'gemma-2-27b-it-Q5_K_M.gguf'
118
- ],
119
- value="gemma-2-9b-it-Q5_K_M.gguf",
120
- label="Model"
121
- ),
122
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
123
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
124
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
125
- gr.Slider(
126
- minimum=0.1,
127
- maximum=1.0,
128
- value=0.95,
129
- step=0.05,
130
- label="Top-p",
131
- ),
132
- gr.Slider(
133
- minimum=0,
134
- maximum=100,
135
- value=40,
136
- step=1,
137
- label="Top-k",
138
- ),
139
- gr.Slider(
140
- minimum=0.0,
141
- maximum=2.0,
142
- value=1.1,
143
- step=0.1,
144
- label="Repetition penalty",
145
- ),
146
- ],
147
- retry_btn="Retry",
148
- undo_btn="Undo",
149
- clear_btn="Clear",
150
- submit_btn="Send",
151
- title="Chat with Gemma 2 using llama.cpp",
152
- description=description,
153
- chatbot=gr.Chatbot(
154
- scale=1,
155
- likeable=False,
156
- show_copy_button=True
157
  )
158
- )
 
159
 
160
  if __name__ == "__main__":
161
- demo.launch()
 
 
 
 
4
  from llama_cpp import Llama
5
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
  from llama_cpp_agent.providers import LlamaCppPythonProvider
 
 
7
  import gradio as gr
8
  from huggingface_hub import hf_hub_download
9
+ import logging
10
+ import time
11
 
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
 
 
 
14
 
15
+ repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
16
+ filename = "Meta-Llama-3-8B-Instruct.Q8_0.gguf"
17
 
18
+ try:
19
+ start_time = time.time()
20
+ logger.info("Downloading Model....")
21
+ hf_hub_download(
22
+ repo_id = repo_id ,
23
+ filename = filename,
24
+ local_dir="./model"
25
+ )
26
+ end_time = time.time()
27
+ logger.info(f"Download complete. Time taken : {start_time - end_time} seconds.")
28
 
29
+ except Exception as e:
30
+ logger.error(f"Unable to download Model : {e}")
31
+ raise
 
 
 
32
 
33
  llm = None
34
  llm_model = None
 
36
  @spaces.GPU(duration=120)
37
  def respond(
38
  message,
 
39
  model,
40
  system_message,
41
  max_tokens,
42
  temperature,
 
 
 
43
  ):
44
+ chat_template = MessagesFormatterType.LLAMA_3
45
 
46
  global llm
47
  global llm_model
 
50
  llm = Llama(
51
  model_path=f"models/{model}",
52
  flash_attn=True,
53
+ n_gpu_layers=-1,
54
  n_batch=1024,
55
  n_ctx=8192,
56
  )
 
67
 
68
  settings = provider.get_provider_default_settings()
69
  settings.temperature = temperature
 
 
70
  settings.max_tokens = max_tokens
 
71
  settings.stream = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  stream = agent.get_chat_response(
74
  message,
75
  llm_sampling_settings=settings,
 
76
  returns_streaming_generator=True,
77
  print_output=False
78
  )
 
82
  outputs += output
83
  yield outputs
84
 
85
+ DESCRIPTION = '''
86
+ <div>
87
+ <h1 style="text-align: center;">ContenteaseAI custom trained model</h1>
88
+ </div>
89
+ '''
90
+
91
+ LICENSE = """
92
+ <p/>
93
+ ---
94
+ For more information, visit our [website](https://contentease.ai).
95
+ """
96
+
97
+ PLACEHOLDER = """
98
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
99
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">ContenteaseAI Custom AI trained model</h1>
100
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Enter the text extracted from the PDF:</p>
101
+ </div>
102
+ """
103
+
104
+ css = """
105
+ h1 {
106
+ text-align: center;
107
+ display: block;
108
+ }
109
  """
110
+ # Gradio block
111
+ chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
112
 
113
+ with gr.Blocks(fill_height=True, css=css) as demo:
114
+ gr.Markdown(DESCRIPTION)
115
+
116
+ gr.ChatInterface(
117
+ fn=respond,
118
+ chatbot=chatbot,
119
+ fill_height=True,
120
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
121
+ additional_inputs=[
122
+ gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
123
+ gr.Slider(minimum=128, maximum=2000, step=1, value=700, label="Max new tokens", render=False),
124
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  )
126
+
127
+ gr.Markdown(LICENSE)
128
 
129
  if __name__ == "__main__":
130
+ try:
131
+ demo.launch(show_error=True, debug = True)
132
+ except Exception as e:
133
+ logger.error(f"Error launching Gradio demo: {e}")