Askinkaty commited on
Commit
3d479ef
·
verified ·
1 Parent(s): 1b5f1b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -27
README.md CHANGED
@@ -10,7 +10,6 @@ tags:
10
  - finance
11
  - relation_extraction
12
  - relation_types
13
- - classification
14
  ---
15
 
16
 
@@ -54,10 +53,23 @@ from transformers import AutoTokenizer, pipeline
54
  # Load Model with PEFT adapter
55
 
56
  finetune_name = 'Askinkaty/llama-finance-relations'
57
- tokenizer = AutoTokenizer.from_pretrained(finetune_name)
58
- model = AutoPeftModelForCausalLM.from_pretrained(
59
- finetune_name, device_map="auto", torch_dtype=torch.float16
 
 
60
  )
 
 
 
 
 
 
 
 
 
 
 
61
  ```
62
 
63
 
@@ -99,31 +111,8 @@ def batch_convert_to_messages(data):
99
  return messages
100
  ```
101
 
102
- The datasets were created using the code below.
103
 
104
 
105
- ```python
106
- from transformers import AutoTokenizer
107
- from datasets import Dataset
108
-
109
- tokenizer = AutoTokenizer.from_pretrained("meta/Llama-3.2-1B-Instruct")
110
-
111
- messages = [
112
- [
113
- {
114
- "role": "system",
115
- "content": "You are an expert in financial documentation and market analysis. Define relations between two specified entities: entity 1 [E1] and entity 2 [E2] in a sentence. Return a short response in the required format. "
116
- },
117
- {"role": "user", "content": f"{question}"},
118
- {"role": "assistant", "content": f"{relation}"},
119
- ], ...
120
- ]
121
-
122
-
123
- dataset = Dataset.from_dict({"messages": messages})
124
- dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["messages"], tokenize=False, add_generation_prompt=False)})
125
-
126
- ```
127
 
128
  #### Training Hyperparameters
129
 
 
10
  - finance
11
  - relation_extraction
12
  - relation_types
 
13
  ---
14
 
15
 
 
53
  # Load Model with PEFT adapter
54
 
55
  finetune_name = 'Askinkaty/llama-finance-relations'
56
+
57
+ finetined_model = AutoPeftModelForCausalLM.from_pretrained(
58
+ pretrained_model_name_or_path=finetune_name,
59
+ torch_dtype=torch.float16,
60
+ low_cpu_mem_usage=True,
61
  )
62
+
63
+
64
+ base_model = "meta-llama/Llama-3.2-1B-Instruct"
65
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
66
+ base_model.config.pad_token_id = base_model.config.eos_token_id
67
+
68
+
69
+ pipeline = pipeline('text-generation', model=base_model, tokenizer=tokenizer)
70
+ pipeline.model = model.to(device)
71
+
72
+
73
  ```
74
 
75
 
 
111
  return messages
112
  ```
113
 
 
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  #### Training Hyperparameters
118