AlfredPros
/

CodeLlama-7b-Instruct-Solidity

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

AlfredPros commited on Nov 5, 2023

Commit

a05273e

·

1 Parent(s): d346fb2

Update README.md

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -119,7 +119,7 @@ bnb_4bit_quant_type = "nf4"
 use_double_nested_quant = True
 compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
-# BitsAndBytesConfig int-4 config
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=use_4bit,
     bnb_4bit_use_double_quant=use_double_nested_quant,
@@ -149,6 +149,6 @@ input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cu
 # Run the model to infere an output
 outputs = model.generate(input_ids=input_ids, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=0.001, pad_token_id=1)
-# Display the generated output
 print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):])
 ```

 use_double_nested_quant = True
 compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
+# BitsAndBytesConfig 4-bit config
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=use_4bit,
     bnb_4bit_use_double_quant=use_double_nested_quant,
 # Run the model to infere an output
 outputs = model.generate(input_ids=input_ids, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=0.001, pad_token_id=1)
+# Detokenize and display the generated output
 print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):])
 ```