Spaces:

AbstractPhil
/

meet-beeper

Running on Zero

App Files Files Community

AbstractPhil commited on Aug 17

Commit

4360288

verified ·

1 Parent(s): 9864aee

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -30

app.py CHANGED Viewed

@@ -5,21 +5,25 @@ from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as load_safetensors
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ----------------------------
 # 🔧 Model versions configuration
 # ----------------------------
 MODEL_VERSIONS = {
     "Beeper v1 (Original)": {
         "repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
         "model_file": "beeper_rose_final.safetensors",
         "description": "Original Beeper trained on TinyStories"
     },
-    "Beeper v2 (Extended)": {
-        "repo_id": "AbstractPhil/beeper-rose-v2",
-        "model_file": "beeper_rose_final.safetensors",
-        "description": "Beeper v2 with extended training (~15 epochs) on a good starting corpus of general knowledge."
-    }
 }
 # Base configuration
@@ -42,6 +46,7 @@ config = {
     "tokenizer_path": "beeper.tokenizer.json"
 }
 # Global model and tokenizer variables
 infer = None
@@ -105,7 +110,7 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
     if infer is None or tok is None:
         return "⚠️ Model not loaded. Please select a version and try again."
-    # Use defaults if not provided (for examples caching)
     if temperature is None:
         temperature = 0.9
     if top_k is None:
@@ -113,43 +118,83 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
     if top_p is None:
         top_p = 0.9
-    # Build conversation context
-    prompt_parts = []
-    if history:
-        for h in history:
-            if h[0]:  # User message exists
-                prompt_parts.append(f"User: {h[0]}")
-            if h[1]:  # Assistant response exists
-                prompt_parts.append(f"Beeper: {h[1]}")
-    # Add current message
-    prompt_parts.append(f"User: {message}")
-    prompt_parts.append("Beeper:")
-    prompt = "\n".join(prompt_parts)
-    # Generate response
     response = generate(
         model=infer,
         tok=tok,
         cfg=config,
         prompt=prompt,
-        max_new_tokens=128,
-        temperature=float(temperature),
         top_k=int(top_k),
         top_p=float(top_p),
-        repetition_penalty=config["repetition_penalty"],
-        presence_penalty=config["presence_penalty"],
-        frequency_penalty=config["frequency_penalty"],
         device=device,
         detokenize=True
     )
-    # Clean up response - remove the prompt part if it's included
     if response.startswith(prompt):
-        response = response[len(prompt):].strip()
-    return response
 # ----------------------------
 # 🖼️ Interface

 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as load_safetensors
 # ----------------------------
 # 🔧 Model versions configuration
 # ----------------------------
 MODEL_VERSIONS = {
+        "Beeper v3 (Philosophy)": {
+        "repo_id": "AbstractPhil/beeper-rose-v3",
+        "model_file": "beeper_final.safetensors",
+        "description": "Beeper v3 with 30+ epochs including ethics & philosophy"
+    },
+    "Beeper v2 (Extended)": {
+        "repo_id": "AbstractPhil/beeper-rose-v2",
+        "model_file": "beeper_final.safetensors",
+        "description": "Beeper v2 with extended training (~15 epochs)"
+    },
     "Beeper v1 (Original)": {
         "repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
         "model_file": "beeper_rose_final.safetensors",
         "description": "Original Beeper trained on TinyStories"
     },
 }
 # Base configuration
     "tokenizer_path": "beeper.tokenizer.json"
 }
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Global model and tokenizer variables
 infer = None
     if infer is None or tok is None:
         return "⚠️ Model not loaded. Please select a version and try again."
+    # Use defaults if not provided
     if temperature is None:
         temperature = 0.9
     if top_k is None:
     if top_p is None:
         top_p = 0.9
+    # Try Q&A format since she has some in corpus
+    if "?" in message:
+        prompt = f"Q: {message}\nA:"
+    elif message.lower().strip() in ["hi", "hello", "hey"]:
+        prompt = "The little robot said hello. She said, \""
+    elif "story" in message.lower():
+        prompt = "Once upon a time, there was a robot. "
+    else:
+        # Simple continuation
+        prompt = message + ". "
+    # Generate response with lower temperature for less repetition
     response = generate(
         model=infer,
         tok=tok,
         cfg=config,
         prompt=prompt,
+        max_new_tokens=80,  # Shorter to avoid rambling
+        temperature=float(temperature) * 0.8,  # Slightly lower temp
         top_k=int(top_k),
         top_p=float(top_p),
+        repetition_penalty=1.3,  # Higher penalty for repetition
+        presence_penalty=0.8,    # Higher presence penalty
+        frequency_penalty=0.2,    # Add frequency penalty
         device=device,
         detokenize=True
     )
+    # Aggressive cleanup
+    # Remove the prompt completely
     if response.startswith(prompt):
+        response = response[len(prompt):]
+    # Remove Q&A format artifacts
+    response = response.replace("Q:", "").replace("A:", "")
+    # Split on newlines and take first non-empty line
+    lines = response.split('\n')
+    for line in lines:
+        clean_line = line.strip()
+        if clean_line and not clean_line.startswith(message[:10]):
+            response = clean_line
+            break
+    # If response still contains the user message, try to extract after it
+    if message.lower()[:20] in response.lower()[:50]:
+        # Find where the echo ends
+        words_in_message = message.split()
+        for i in range(min(5, len(words_in_message)), 0, -1):
+            pattern = ' '.join(words_in_message[:i])
+            if pattern.lower() in response.lower():
+                idx = response.lower().find(pattern.lower()) + len(pattern)
+                response = response[idx:].strip()
+                break
+    # Remove any remaining "User" or "Beeper" artifacts
+    for artifact in ["User:", "Beeper:", "U ser:", "Beep er:", "User ", "Beeper "]:
+        response = response.replace(artifact, "")
+    # Ensure we have something
+    if not response or len(response) < 3:
+        responses = [
+            "I like robots and stories!",
+            "That's interesting!",
+            "I want to play in the park.",
+            "The robot was happy.",
+            "Yes, I think so too!"
+        ]
+        import random
+        response = random.choice(responses)
+    # Clean ending
+    response = response.strip()
+    if response and response[-1] not in '.!?"':
+        response = response.rsplit('.', 1)[0] + '.' if '.' in response else response + '.'
+    return response[:200]  # Cap length
 # ----------------------------
 # 🖼️ Interface