Spaces:

joshuaberkowitzus
/

txgemma-2b-predict-demo

Runtime error

App Files Files Community

joshuaberkowitzus commited on Apr 14

Commit

c1399be

verified ·

1 Parent(s): 917741d

Updated to use the tdc_prompts

Browse files

Files changed (1) hide show

app.py +81 -37

app.py CHANGED Viewed

@@ -2,24 +2,33 @@
 # Make sure to add 'gradio', 'transformers', and 'torch' (or 'tensorflow'/'flax')
 # to your requirements.txt file in the Hugging Face Space repository.
 # gated model
 import gradio as gr
 import torch # Or tensorflow/flax depending on backend
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Set Hugging Face token if needed (for gated models, though Llama 3.1 might not require it after initial access grant)
-import os
-from huggingface_hub import login
 hf_token = os.getenv("HF_TOKEN")
 login(token=hf_token)
 # --- Configuration ---
 MODEL_NAME = "google/txgemma-2b-predict"
 MODEL_CACHE = "model_cache" # Optional: define a cache directory
-# --- Load Model and Tokenizer ---
-# This might take some time the first time the space boots up
 print(f"Loading model: {MODEL_NAME}...")
 try:
     # Check if GPU is available and use it, otherwise use CPU
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -30,20 +39,47 @@ try:
     print("Tokenizer loaded.")
     # Load the model
-    # Use torch_dtype=torch.float16 for potentially faster inference and less memory on GPU
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE,
-        # torch_dtype=torch.float16 if device == "cuda" else None, # Uncomment if using GPU and want float16
         device_map="auto" # Automatically distribute model across available devices (GPU/CPU)
     )
     print("Model loaded.")
-    # model.to(device) # Ensure model is on the correct device if not using device_map="auto"
 except Exception as e:
-    print(f"Error loading model or tokenizer: {e}")
-    # Handle the error appropriately, maybe raise it or exit
-    raise gr.Error(f"Failed to load the model {MODEL_NAME}. Check logs for details. Error: {e}")
 # --- Prediction Function ---
@@ -67,34 +103,37 @@ def predict(prompt, max_new_tokens=100, temperature=0.7):
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Move inputs to the model's device
         # Generate text
-        # Use torch.no_grad() for inference to save memory and speed up
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=int(max_new_tokens), # Ensure it's an integer
                 temperature=float(temperature),   # Ensure it's a float
-                do_sample=True, # Sample rather than greedy decoding if temperature > 0
-                pad_token_id=tokenizer.eos_token_id # Set pad token id to end-of-sequence token id
             )
         # Decode the generated tokens
         generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        print(f"Generated text: {generated_text}")
-        # Often, the model output includes the prompt. Remove it if present.
-        # Note: This basic removal might not be perfect for all cases.
         if generated_text.startswith(prompt):
-             # Add a small buffer in case of slight variations
             prompt_length = len(prompt)
-            result_text = generated_text[prompt_length:].lstrip() # Remove leading whitespace
         else:
-             result_text = generated_text
         return result_text
     except Exception as e:
         print(f"Error during prediction: {e}")
-        # Return a user-friendly error message
         return f"An error occurred during generation: {e}"
 # --- Gradio Interface ---
@@ -104,8 +143,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         f"""
         # 🤖 TXGemma-2B-Predict Text Generation
-        Enter a prompt below and the model ({MODEL_NAME}) will generate text based on it.
-        Adjust the parameters for different results.
         """
     )
     with gr.Row():
@@ -118,19 +157,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             with gr.Row():
                  max_tokens_slider = gr.Slider(
                     minimum=10,
-                    maximum=500,
                     value=100,
                     step=10,
                     label="Max New Tokens",
                     info="Maximum number of tokens to generate after the prompt."
                  )
                  temperature_slider = gr.Slider(
-                    minimum=0.1,
                     maximum=1.5,
                     value=0.7,
-                    step=0.1,
                     label="Temperature",
-                    info="Controls randomness. Lower values are more focused, higher values more creative."
                  )
             submit_button = gr.Button("Generate Text", variant="primary")
         with gr.Column(scale=3):
@@ -148,16 +187,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         api_name="predict" # Name for API endpoint if needed
     )
-    gr.Examples(
-        examples=[["Instructions: Answer the following question about drug properties.Context: As a membrane separating circulating blood and brain extracellular fluid, the blood-brain barrier (BBB) is the protection layer that blocks most foreign drugs. Thus the ability of a drug to penetrate the barrier to deliver to the site of action forms a crucial challenge in development of drugs for central nervous system.Question: Given a drug SMILES string, predict whether it(A) does not cross the BBB (B) crosses the BBB Drug SMILES: CN1C(=O)CN=C(C2=CCCCC2)c2cc(Cl)ccc21 Answer:", 150, 0.8]],
-        inputs=[prompt_input, max_tokens_slider, temperature_slider],
-        outputs=output_text,
-        fn=predict,
-        cache_examples=False # Caching might be slow for LLMs
-    )
 # --- Launch the App ---
 print("Launching Gradio app...")
-# share=True creates a public link (useful for testing but remove/set to False for permanent spaces if not needed)
 # queue() enables handling multiple users concurrently
 demo.queue().launch(debug=True) # Set debug=False for production

 # Make sure to add 'gradio', 'transformers', and 'torch' (or 'tensorflow'/'flax')
 # to your requirements.txt file in the Hugging Face Space repository.
 # gated model
+# Set Hugging Face token if needed (for gated models, though Llama 3.1 might not require it after initial access grant)
+from huggingface_hub import login
+# app.py for Hugging Face Space
+# Make sure to add 'gradio', 'transformers', 'torch' (or 'tensorflow'/'flax'),
+# and 'huggingface_hub' to your requirements.txt file in the Hugging Face Space repository.
 import gradio as gr
 import torch # Or tensorflow/flax depending on backend
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import hf_hub_download # Import hub download function
+import json # Import json library
+import os # Import os library for path joining
+# --- hf lpgin ---
 hf_token = os.getenv("HF_TOKEN")
 login(token=hf_token)
 # --- Configuration ---
 MODEL_NAME = "google/txgemma-2b-predict"
+PROMPT_FILENAME = "tdc_prompts.json"
 MODEL_CACHE = "model_cache" # Optional: define a cache directory
+MAX_EXAMPLES = 10 # Limit the number of examples loaded from the JSON
+# --- Load Model, Tokenizer, and Prompts ---
 print(f"Loading model: {MODEL_NAME}...")
+tdc_prompts_data = [] # Initialize empty list for prompts
 try:
     # Check if GPU is available and use it, otherwise use CPU
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print("Tokenizer loaded.")
     # Load the model
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE,
         device_map="auto" # Automatically distribute model across available devices (GPU/CPU)
     )
     print("Model loaded.")
+    # Download and load the prompts JSON file
+    print(f"Downloading {PROMPT_FILENAME}...")
+    prompts_file_path = hf_hub_download(
+        repo_id=MODEL_NAME,
+        filename=PROMPT_FILENAME,
+        cache_dir=MODEL_CACHE,
+        # force_download=True, # Uncomment to force redownload if needed
+    )
+    print(f"{PROMPT_FILENAME} downloaded to: {prompts_file_path}")
+    # Load the JSON data
+    with open(prompts_file_path, 'r') as f:
+        tdc_prompts_data = json.load(f)
+    print(f"Loaded {len(tdc_prompts_data)} prompts from {PROMPT_FILENAME}.")
+    # --- Prepare examples for Gradio ---
+    # ASSUMPTION: tdc_prompts.json is a list of objects, each with at least a 'prompt' key.
+    # We'll use default values for max_tokens and temperature for the examples.
+    # Modify this logic if the JSON structure is different.
+    if isinstance(tdc_prompts_data, list):
+         # Limit the number of examples shown in the UI
+        examples_list = [
+            [item.get("prompt", "Missing prompt"), 100, 0.7] # Default max_tokens=100, temp=0.7
+            for item in tdc_prompts_data[:MAX_EXAMPLES]
+            if isinstance(item, dict) and "prompt" in item # Ensure item is dict and has 'prompt'
+        ]
+    else:
+        print(f"Warning: {PROMPT_FILENAME} does not contain a list. Cannot load examples.")
+        examples_list = [] # Fallback to empty examples
 except Exception as e:
+    print(f"Error loading model, tokenizer, or prompts: {e}")
+    raise gr.Error(f"Failed during setup. Check logs for details. Error: {e}")
 # --- Prediction Function ---
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Move inputs to the model's device
         # Generate text
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=int(max_new_tokens), # Ensure it's an integer
                 temperature=float(temperature),   # Ensure it's a float
+                do_sample=True if float(temperature) > 0 else False, # Only sample if temp > 0
+                pad_token_id=tokenizer.eos_token_id # Set pad token id
             )
         # Decode the generated tokens
         generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Generated text (raw): {generated_text}")
+        # Remove the prompt from the beginning of the generated text
         if generated_text.startswith(prompt):
             prompt_length = len(prompt)
+            result_text = generated_text[prompt_length:].lstrip()
         else:
+             # Handle cases where the model might slightly alter the prompt start
+             # This is a basic check; more robust checks might be needed
+             common_prefix = os.path.commonprefix([prompt, generated_text])
+             if len(common_prefix) > len(prompt) * 0.8: # If >80% of prompt matches start
+                 result_text = generated_text[len(common_prefix):].lstrip()
+             else:
+                 result_text = generated_text # Assume prompt is not included
+        print(f"Generated text (processed): {result_text}")
         return result_text
     except Exception as e:
         print(f"Error during prediction: {e}")
         return f"An error occurred during generation: {e}"
 # --- Gradio Interface ---
         f"""
         # 🤖 TXGemma-2B-Predict Text Generation
+        Enter a prompt below or select an example, and the model ({MODEL_NAME}) will generate text based on it.
+        Adjust the parameters for different results. Examples loaded from `{PROMPT_FILENAME}`.
         """
     )
     with gr.Row():
             with gr.Row():
                  max_tokens_slider = gr.Slider(
                     minimum=10,
+                    maximum=500, # Adjust max limit if needed
                     value=100,
                     step=10,
                     label="Max New Tokens",
                     info="Maximum number of tokens to generate after the prompt."
                  )
                  temperature_slider = gr.Slider(
+                    minimum=0.0, # Allow deterministic generation
                     maximum=1.5,
                     value=0.7,
+                    step=0.05, # Finer control for temperature
                     label="Temperature",
+                    info="Controls randomness (0=deterministic, >0=random)."
                  )
             submit_button = gr.Button("Generate Text", variant="primary")
         with gr.Column(scale=3):
         api_name="predict" # Name for API endpoint if needed
     )
+    # Use the loaded examples if available
+    if examples_list:
+        gr.Examples(
+            examples=examples_list,
+            inputs=[prompt_input, max_tokens_slider, temperature_slider], # Match inputs to the predict function
+            outputs=output_text,
+            fn=predict, # The function to run when an example is clicked
+            cache_examples=False # Caching might be slow/problematic for LLMs
+        )
+    else:
+        gr.Markdown("_(Could not load examples from JSON file.)_")
 # --- Launch the App ---
 print("Launching Gradio app...")
 # queue() enables handling multiple users concurrently
 demo.queue().launch(debug=True) # Set debug=False for production