Spaces:

Tonic
/

Inkuba-0.4B

Running

Tonic commited on Sep 2, 2024

Commit

ce33972

verified ·

1 Parent(s): b75aa1a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ description = """✒️InkubaLM has been trained from scratch using 1.9 billion
 hf_token = os.getenv("HF_TOKEN")
 # Load the model and tokenizer
 model_name = "lelapa/InkubaLM-0.4B"
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_token)
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_token)
 # Move model to GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -32,7 +32,8 @@ def generate_text(prompt, max_length, repetition_penalty, temperature):
         max_length=max_length,
         repetition_penalty=repetition_penalty,
         temperature=temperature,
-        pad_token_id=tokenizer.eos_token_id
     )
     # Decode the generated tokens and return the result

 hf_token = os.getenv("HF_TOKEN")
 # Load the model and tokenizer
 model_name = "lelapa/InkubaLM-0.4B"
+model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, token=hf_token)
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=hf_token)
 # Move model to GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         max_length=max_length,
         repetition_penalty=repetition_penalty,
         temperature=temperature,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True
     )
     # Decode the generated tokens and return the result