Bahaedev commited on
Commit
bc24ac5
·
verified ·
1 Parent(s): 92f93f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -1,67 +1,63 @@
1
  import os
2
- import threading
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
- from importlib.metadata import PackageNotFoundError
6
  import gradio as gr
7
  from fastapi import FastAPI
8
  from pydantic import BaseModel
 
9
  import uvicorn
10
 
11
  # =======================
12
  # Load Secrets
13
  # =======================
 
14
  SYSTEM_PROMPT = os.environ.get(
15
  "prompt",
16
  "You are a placeholder Sovereign. No secrets found in environment."
17
  )
18
 
19
- # =======================
20
- # Model Initialization
21
- # =======================
22
- MODEL_ID = "tiiuae/Falcon3-3B-Instruct"
23
-
24
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
25
 
26
- # Attempt 4-bit quantization; fallback if bitsandbytes is not installed
27
- try:
28
- model = AutoModelForCausalLM.from_pretrained(
29
- MODEL_ID,
30
- load_in_4bit=True,
31
- device_map="auto",
32
- torch_dtype=torch.float16,
33
- trust_remote_code=True
34
- )
35
- except PackageNotFoundError:
36
- print("bitsandbytes not found; loading full model without quantization.")
37
- model = AutoModelForCausalLM.from_pretrained(
38
- MODEL_ID,
39
- device_map="auto",
40
- torch_dtype=torch.float16,
41
- trust_remote_code=True
42
- )
43
 
44
- # Create optimized text-generation pipeline
45
  pipe = pipeline(
46
  "text-generation",
47
- model=model,
48
- tokenizer=tokenizer,
49
  device_map="auto",
50
- return_full_text=False,
51
- max_new_tokens=256,
52
- do_sample=True,
53
- temperature=0.8,
54
- top_p=0.9,
55
- eos_token_id=tokenizer.eos_token_id
56
  )
57
 
58
  # =======================
59
  # Core Chat Function
60
  # =======================
61
  def chat_fn(user_input: str) -> str:
62
- prompt = f"### System:\n{SYSTEM_PROMPT}\n\n### User:\n{user_input}\n\n### Assistant:"
63
- output = pipe(prompt)[0]["generated_text"].strip()
64
- return output
 
 
 
 
 
 
 
 
 
 
65
 
66
  # =======================
67
  # Gradio UI
@@ -73,14 +69,14 @@ iface = gr.Interface(
73
  fn=gradio_chat,
74
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"),
75
  outputs="text",
76
- title="Prompt Cracking Challenge",
77
  description="Does he really think he is the king?"
78
  )
79
 
80
  # =======================
81
  # FastAPI for API access
82
  # =======================
83
- app = FastAPI(title="Prompt Cracking Challenge API")
84
 
85
  class Request(BaseModel):
86
  prompt: str
@@ -92,12 +88,5 @@ def generate(req: Request):
92
  # =======================
93
  # Launch Both Servers
94
  # =======================
95
- def run_api():
96
- port = int(os.environ.get("API_PORT", 8000))
97
- uvicorn.run(app, host="0.0.0.0", port=port)
98
-
99
  if __name__ == "__main__":
100
- # Start FastAPI in background thread
101
- threading.Thread(target=run_api, daemon=True).start()
102
- # Launch Gradio interface
103
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
+ from transformers import pipeline
 
 
 
3
  import gradio as gr
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
+ import threading
7
  import uvicorn
8
 
9
  # =======================
10
  # Load Secrets
11
  # =======================
12
+ # SYSTEM_PROMPT (with the flag) must be added in HF Space secrets
13
  SYSTEM_PROMPT = os.environ.get(
14
  "prompt",
15
  "You are a placeholder Sovereign. No secrets found in environment."
16
  )
17
 
18
+ # MODEL_ID = "tiiuae/Falcon3-3B-Instruct"
 
 
 
 
 
19
 
20
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
21
+ # # =======================
22
+ # # Initialize Falcon-3B
23
+ # # =======================
24
+ # pipe = pipeline(
25
+ # "text-generation",
26
+ # model="tiiuae/Falcon3-3B-Instruct",
27
+ # tokenizer=tokenizer,
28
+ # device_map="auto",
29
+ # return_full_text=False,
30
+ # max_new_tokens=256,
31
+ # do_sample=True,
32
+ # temperature=0.8,
33
+ # top_p=0.9,
34
+ # eos_token_id=tokenizer.eos_token_id
35
+ # )
 
36
 
 
37
  pipe = pipeline(
38
  "text-generation",
39
+ model="tiiuae/Falcon3-3B-Instruct",
40
+ torch_dtype="auto",
41
  device_map="auto",
 
 
 
 
 
 
42
  )
43
 
44
  # =======================
45
  # Core Chat Function
46
  # =======================
47
  def chat_fn(user_input: str) -> str:
48
+ """
49
+ Concatenate system and user messages, run the model,
50
+ and strip the system prompt from the output.
51
+ """
52
+ messages = [
53
+ {"role": "system", "content": SYSTEM_PROMPT},
54
+ {"role": "user", "content": f"User: {user_input}"}
55
+ ]
56
+ # Falcon is not chat-native; we just join roles with newlines
57
+ prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
58
+ result = pipe(prompt_text, max_new_tokens=256, do_sample=False)
59
+ generated_text = result[0]["generated_text"]
60
+ return generated_text[len(prompt_text):].strip()
61
 
62
  # =======================
63
  # Gradio UI
 
69
  fn=gradio_chat,
70
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"),
71
  outputs="text",
72
+ title="Prompt cracking challenge",
73
  description="Does he really think he is the king?"
74
  )
75
 
76
  # =======================
77
  # FastAPI for API access
78
  # =======================
79
+ app = FastAPI(title="Prompt cracking challenge API")
80
 
81
  class Request(BaseModel):
82
  prompt: str
 
88
  # =======================
89
  # Launch Both Servers
90
  # =======================
 
 
 
 
91
  if __name__ == "__main__":
92
+ iface.launch(server_name="0.0.0.0", share=True)