Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,21 +5,25 @@ from tokenizers import Tokenizer
|
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
from safetensors.torch import load_file as load_safetensors
|
| 7 |
|
| 8 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
# ----------------------------
|
| 10 |
# 🔧 Model versions configuration
|
| 11 |
# ----------------------------
|
| 12 |
MODEL_VERSIONS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"Beeper v1 (Original)": {
|
| 14 |
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
|
| 15 |
"model_file": "beeper_rose_final.safetensors",
|
| 16 |
"description": "Original Beeper trained on TinyStories"
|
| 17 |
},
|
| 18 |
-
"Beeper v2 (Extended)": {
|
| 19 |
-
"repo_id": "AbstractPhil/beeper-rose-v2",
|
| 20 |
-
"model_file": "beeper_rose_final.safetensors",
|
| 21 |
-
"description": "Beeper v2 with extended training (~15 epochs) on a good starting corpus of general knowledge."
|
| 22 |
-
}
|
| 23 |
}
|
| 24 |
|
| 25 |
# Base configuration
|
|
@@ -42,6 +46,7 @@ config = {
|
|
| 42 |
"tokenizer_path": "beeper.tokenizer.json"
|
| 43 |
}
|
| 44 |
|
|
|
|
| 45 |
|
| 46 |
# Global model and tokenizer variables
|
| 47 |
infer = None
|
|
@@ -105,7 +110,7 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
|
|
| 105 |
if infer is None or tok is None:
|
| 106 |
return "⚠️ Model not loaded. Please select a version and try again."
|
| 107 |
|
| 108 |
-
# Use defaults if not provided
|
| 109 |
if temperature is None:
|
| 110 |
temperature = 0.9
|
| 111 |
if top_k is None:
|
|
@@ -113,43 +118,83 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
|
|
| 113 |
if top_p is None:
|
| 114 |
top_p = 0.9
|
| 115 |
|
| 116 |
-
#
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
| 124 |
|
| 125 |
-
#
|
| 126 |
-
prompt_parts.append(f"User: {message}")
|
| 127 |
-
prompt_parts.append("Beeper:")
|
| 128 |
-
|
| 129 |
-
prompt = "\n".join(prompt_parts)
|
| 130 |
-
|
| 131 |
-
# Generate response
|
| 132 |
response = generate(
|
| 133 |
model=infer,
|
| 134 |
tok=tok,
|
| 135 |
cfg=config,
|
| 136 |
prompt=prompt,
|
| 137 |
-
max_new_tokens=
|
| 138 |
-
temperature=float(temperature),
|
| 139 |
top_k=int(top_k),
|
| 140 |
top_p=float(top_p),
|
| 141 |
-
repetition_penalty=
|
| 142 |
-
presence_penalty=
|
| 143 |
-
frequency_penalty=
|
| 144 |
device=device,
|
| 145 |
detokenize=True
|
| 146 |
)
|
| 147 |
|
| 148 |
-
#
|
|
|
|
| 149 |
if response.startswith(prompt):
|
| 150 |
-
response = response[len(prompt):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
return response
|
| 153 |
|
| 154 |
# ----------------------------
|
| 155 |
# 🖼️ Interface
|
|
|
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
from safetensors.torch import load_file as load_safetensors
|
| 7 |
|
|
|
|
| 8 |
# ----------------------------
|
| 9 |
# 🔧 Model versions configuration
|
| 10 |
# ----------------------------
|
| 11 |
MODEL_VERSIONS = {
|
| 12 |
+
"Beeper v3 (Philosophy)": {
|
| 13 |
+
"repo_id": "AbstractPhil/beeper-rose-v3",
|
| 14 |
+
"model_file": "beeper_final.safetensors",
|
| 15 |
+
"description": "Beeper v3 with 30+ epochs including ethics & philosophy"
|
| 16 |
+
},
|
| 17 |
+
"Beeper v2 (Extended)": {
|
| 18 |
+
"repo_id": "AbstractPhil/beeper-rose-v2",
|
| 19 |
+
"model_file": "beeper_final.safetensors",
|
| 20 |
+
"description": "Beeper v2 with extended training (~15 epochs)"
|
| 21 |
+
},
|
| 22 |
"Beeper v1 (Original)": {
|
| 23 |
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
|
| 24 |
"model_file": "beeper_rose_final.safetensors",
|
| 25 |
"description": "Original Beeper trained on TinyStories"
|
| 26 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}
|
| 28 |
|
| 29 |
# Base configuration
|
|
|
|
| 46 |
"tokenizer_path": "beeper.tokenizer.json"
|
| 47 |
}
|
| 48 |
|
| 49 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 50 |
|
| 51 |
# Global model and tokenizer variables
|
| 52 |
infer = None
|
|
|
|
| 110 |
if infer is None or tok is None:
|
| 111 |
return "⚠️ Model not loaded. Please select a version and try again."
|
| 112 |
|
| 113 |
+
# Use defaults if not provided
|
| 114 |
if temperature is None:
|
| 115 |
temperature = 0.9
|
| 116 |
if top_k is None:
|
|
|
|
| 118 |
if top_p is None:
|
| 119 |
top_p = 0.9
|
| 120 |
|
| 121 |
+
# Try Q&A format since she has some in corpus
|
| 122 |
+
if "?" in message:
|
| 123 |
+
prompt = f"Q: {message}\nA:"
|
| 124 |
+
elif message.lower().strip() in ["hi", "hello", "hey"]:
|
| 125 |
+
prompt = "The little robot said hello. She said, \""
|
| 126 |
+
elif "story" in message.lower():
|
| 127 |
+
prompt = "Once upon a time, there was a robot. "
|
| 128 |
+
else:
|
| 129 |
+
# Simple continuation
|
| 130 |
+
prompt = message + ". "
|
| 131 |
|
| 132 |
+
# Generate response with lower temperature for less repetition
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
response = generate(
|
| 134 |
model=infer,
|
| 135 |
tok=tok,
|
| 136 |
cfg=config,
|
| 137 |
prompt=prompt,
|
| 138 |
+
max_new_tokens=80, # Shorter to avoid rambling
|
| 139 |
+
temperature=float(temperature) * 0.8, # Slightly lower temp
|
| 140 |
top_k=int(top_k),
|
| 141 |
top_p=float(top_p),
|
| 142 |
+
repetition_penalty=1.3, # Higher penalty for repetition
|
| 143 |
+
presence_penalty=0.8, # Higher presence penalty
|
| 144 |
+
frequency_penalty=0.2, # Add frequency penalty
|
| 145 |
device=device,
|
| 146 |
detokenize=True
|
| 147 |
)
|
| 148 |
|
| 149 |
+
# Aggressive cleanup
|
| 150 |
+
# Remove the prompt completely
|
| 151 |
if response.startswith(prompt):
|
| 152 |
+
response = response[len(prompt):]
|
| 153 |
+
|
| 154 |
+
# Remove Q&A format artifacts
|
| 155 |
+
response = response.replace("Q:", "").replace("A:", "")
|
| 156 |
+
|
| 157 |
+
# Split on newlines and take first non-empty line
|
| 158 |
+
lines = response.split('\n')
|
| 159 |
+
for line in lines:
|
| 160 |
+
clean_line = line.strip()
|
| 161 |
+
if clean_line and not clean_line.startswith(message[:10]):
|
| 162 |
+
response = clean_line
|
| 163 |
+
break
|
| 164 |
+
|
| 165 |
+
# If response still contains the user message, try to extract after it
|
| 166 |
+
if message.lower()[:20] in response.lower()[:50]:
|
| 167 |
+
# Find where the echo ends
|
| 168 |
+
words_in_message = message.split()
|
| 169 |
+
for i in range(min(5, len(words_in_message)), 0, -1):
|
| 170 |
+
pattern = ' '.join(words_in_message[:i])
|
| 171 |
+
if pattern.lower() in response.lower():
|
| 172 |
+
idx = response.lower().find(pattern.lower()) + len(pattern)
|
| 173 |
+
response = response[idx:].strip()
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
# Remove any remaining "User" or "Beeper" artifacts
|
| 177 |
+
for artifact in ["User:", "Beeper:", "U ser:", "Beep er:", "User ", "Beeper "]:
|
| 178 |
+
response = response.replace(artifact, "")
|
| 179 |
+
|
| 180 |
+
# Ensure we have something
|
| 181 |
+
if not response or len(response) < 3:
|
| 182 |
+
responses = [
|
| 183 |
+
"I like robots and stories!",
|
| 184 |
+
"That's interesting!",
|
| 185 |
+
"I want to play in the park.",
|
| 186 |
+
"The robot was happy.",
|
| 187 |
+
"Yes, I think so too!"
|
| 188 |
+
]
|
| 189 |
+
import random
|
| 190 |
+
response = random.choice(responses)
|
| 191 |
+
|
| 192 |
+
# Clean ending
|
| 193 |
+
response = response.strip()
|
| 194 |
+
if response and response[-1] not in '.!?"':
|
| 195 |
+
response = response.rsplit('.', 1)[0] + '.' if '.' in response else response + '.'
|
| 196 |
|
| 197 |
+
return response[:200] # Cap length
|
| 198 |
|
| 199 |
# ----------------------------
|
| 200 |
# 🖼️ Interface
|