Update README.md
Browse files
README.md
CHANGED
|
@@ -34,26 +34,26 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 34 |
from peft import PeftModel
|
| 35 |
from huggingface_hub import login
|
| 36 |
|
| 37 |
-
|
| 38 |
login("Huggingface access token")
|
| 39 |
|
| 40 |
-
|
| 41 |
peft_model_name="bpavlsh/Mistral-Fake-News-Detection"
|
| 42 |
|
| 43 |
-
|
| 44 |
base_model = AutoModelForCausalLM.from_pretrained( model_id, load_in_4bit=True,
|
| 45 |
device_map="auto", torch_dtype="auto")
|
| 46 |
model = PeftModel.from_pretrained(base_model, peft_model_name)
|
| 47 |
|
| 48 |
-
|
| 49 |
|
| 50 |
-
|
| 51 |
You are an expert in analyzing news for fake content, propaganda, and offensive language.
|
| 52 |
<</SYS>>
|
| 53 |
|
| 54 |
-
|
| 55 |
|
| 56 |
-
|
| 57 |
output = model.generate(**inputs, max_new_tokens=1500)
|
| 58 |
output_result=tokenizer.decode(output[0], skip_special_tokens=True)
|
| 59 |
result=output_result.split('[/INST]')[1]
|
|
|
|
| 34 |
from peft import PeftModel
|
| 35 |
from huggingface_hub import login
|
| 36 |
|
| 37 |
+
#Login to Huggingface to load Mistral LLM
|
| 38 |
login("Huggingface access token")
|
| 39 |
|
| 40 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
| 41 |
peft_model_name="bpavlsh/Mistral-Fake-News-Detection"
|
| 42 |
|
| 43 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 44 |
base_model = AutoModelForCausalLM.from_pretrained( model_id, load_in_4bit=True,
|
| 45 |
device_map="auto", torch_dtype="auto")
|
| 46 |
model = PeftModel.from_pretrained(base_model, peft_model_name)
|
| 47 |
|
| 48 |
+
text=""" News text for analysis, from 1Kb to 10Kb """
|
| 49 |
|
| 50 |
+
prompt = f"""<s>[INST] <<SYS>>
|
| 51 |
You are an expert in analyzing news for fake content, propaganda, and offensive language.
|
| 52 |
<</SYS>>
|
| 53 |
|
| 54 |
+
Please analyze the following text: {text} [/INST]"""
|
| 55 |
|
| 56 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
| 57 |
output = model.generate(**inputs, max_new_tokens=1500)
|
| 58 |
output_result=tokenizer.decode(output[0], skip_special_tokens=True)
|
| 59 |
result=output_result.split('[/INST]')[1]
|