kimo33442 commited on
Commit
696c38d
·
verified ·
1 Parent(s): 06ec20a

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. keo_ai_studio_package.py +187 -0
keo_ai_studio_package.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # keo_ai_studio package (single-file view)
2
+ # Save this structure locally as shown in README below.
3
+
4
+ # setup.py
5
+ setup_py = r"""
6
+ from setuptools import setup, find_packages
7
+
8
+ setup(
9
+ name="keo-ai-studio",
10
+ version="0.1.0",
11
+ packages=find_packages(),
12
+ install_requires=[
13
+ "transformers>=4.30.0",
14
+ "torch>=1.12.0"
15
+ ],
16
+ entry_points={
17
+ 'console_scripts': [
18
+ 'keo-chat=keo_ai_studio.cli:main'
19
+ ]
20
+ },
21
+ author="العبقري كريم حسين",
22
+ description="keo ai studio - thin python wrapper for local LLMs with optional fine-tune helpers",
23
+ url="",
24
+ )
25
+ """
26
+
27
+ # keo_ai_studio/__init__.py
28
+ init_py = r"""
29
+ """
30
+ from .model import KeoAI
31
+ from .trainer import finetune
32
+
33
+ __all__ = ["KeoAI", "finetune"]
34
+ """
35
+
36
+ # keo_ai_studio/model.py
37
+ model_py = r"""
38
+ import os
39
+ from typing import Optional
40
+
41
+ try:
42
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
43
+ except Exception:
44
+ # lazy import fallback: useful so package imports even if transformers not installed
45
+ AutoTokenizer = None
46
+ AutoModelForCausalLM = None
47
+ pipeline = None
48
+
49
+ class KeoAI:
50
+ """Thin wrapper that loads a Hugging Face compatible model or local folder.
51
+ Usage:
52
+ k = KeoAI(model_name_or_path="path_or_hf_id")
53
+ k.chat("السؤال هنا؟")
54
+ If transformers is not installed, the object will raise when used.
55
+ """
56
+ def __init__(self, model_name_or_path: Optional[str] = None, alias: str = "keo ai studio"):
57
+ self.alias = alias
58
+ self.model_name_or_path = model_name_or_path or os.getcwd()
59
+ if AutoTokenizer is None:
60
+ raise RuntimeError("transformers not installed. Run: pip install transformers torch")
61
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
62
+ self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path)
63
+ # convenience pipeline
64
+ self._pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
65
+
66
+ def chat(self, prompt: str, max_new_tokens: int = 128, do_sample: bool = True):
67
+ """Generate a reply for given prompt."""
68
+ full = self._pipe(prompt, max_new_tokens=max_new_tokens, do_sample=do_sample)
69
+ return full[0]["generated_text"]
70
+
71
+ def reply_author(self):
72
+ return "العبقري كريم حسين"
73
+
74
+ def smart_answer(self, question: str):
75
+ q_low = question.strip().lower()
76
+ if any(x in q_low for x in ["مين عملك","من صنعك","من انشأك","who made you","who created you"]):
77
+ return self.reply_author()
78
+ return self.chat(question)
79
+ """
80
+
81
+ # keo_ai_studio/trainer.py
82
+ trainer_py = r"""
83
+ # Very small helper functions to fine-tune a causal LM using Hugging Face Trainer.
84
+ # This file expects transformers, datasets, accelerate installed and a prepared dataset.
85
+
86
+ def finetune(model_path_or_id, dataset_path, output_dir, epochs=1, batch_size=2, lr=2e-5):
87
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
88
+ from datasets import load_dataset
89
+
90
+ tokenizer = AutoTokenizer.from_pretrained(model_path_or_id)
91
+ model = AutoModelForCausalLM.from_pretrained(model_path_or_id)
92
+
93
+ ds = load_dataset('text', data_files={'train': dataset_path})
94
+ def tokf(ex):
95
+ return tokenizer(ex['text'], truncation=True, max_length=1024)
96
+ tokenized = ds.map(tokf, batched=True, remove_columns=['text'])
97
+
98
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
99
+ training_args = TrainingArguments(
100
+ output_dir=output_dir,
101
+ num_train_epochs=epochs,
102
+ per_device_train_batch_size=batch_size,
103
+ save_total_limit=2,
104
+ logging_steps=200,
105
+ fp16=False,
106
+ )
107
+ trainer = Trainer(model=model, args=training_args, train_dataset=tokenized['train'], data_collator=data_collator)
108
+ trainer.train()
109
+ trainer.save_model(output_dir)
110
+ tokenizer.save_pretrained(output_dir)
111
+ """
112
+
113
+ # keo_ai_studio/cli.py
114
+ cli_py = r"""
115
+ import argparse
116
+ from .model import KeoAI
117
+
118
+ def main():
119
+ parser = argparse.ArgumentParser(prog='keo-chat')
120
+ parser.add_argument('--model', '-m', default=None, help='model id or local path')
121
+ args = parser.parse_args()
122
+ k = KeoAI(args.model)
123
+ print('keo ai studio interactive. type exit to quit')
124
+ while True:
125
+ try:
126
+ q = input('> ')
127
+ except EOFError:
128
+ break
129
+ if not q: continue
130
+ if q.strip().lower() in ('exit','quit','خروج'): break
131
+ print('\n' + k.smart_answer(q) + '\n')
132
+
133
+ if __name__ == '__main__':
134
+ main()
135
+ """
136
+
137
+ # README.md
138
+ readme = r"""
139
+ keo-ai-studio
140
+ =============
141
+
142
+ Lightweight Python package that wraps a Hugging Face compatible causal LM.
143
+
144
+ Installation (from local folder):
145
+
146
+ ```bash
147
+ pip install .
148
+ ```
149
+
150
+ Usage:
151
+
152
+ ```python
153
+ from keo_ai_studio import KeoAI
154
+ k = KeoAI(model_name_or_path='path_or_hf_id')
155
+ print(k.smart_answer('من عملك؟')) # returns the author line
156
+ print(k.smart_answer('اشرح بايثون'))
157
+ ```
158
+
159
+ Fine-tune helper:
160
+
161
+ ```python
162
+ from keo_ai_studio import finetune
163
+ finetune('gpt2', 'data/my_corpus.txt', './keo_finetuned', epochs=1)
164
+ ```
165
+ """
166
+
167
+ # Combined package writer - instruct user to create files
168
+ package_files = {
169
+ 'setup.py': setup_py,
170
+ 'keo_ai_studio/__init__.py': init_py,
171
+ 'keo_ai_studio/model.py': model_py,
172
+ 'keo_ai_studio/trainer.py': trainer_py,
173
+ 'keo_ai_studio/cli.py': cli_py,
174
+ 'README.md': readme,
175
+ }
176
+
177
+ print('Files to create in your project:')
178
+ for p in package_files:
179
+ print('-', p)
180
+
181
+ # For convenience, write them to a zip in current working dir for user to download locally
182
+ import zipfile, os
183
+ zipname = os.path.join('/mnt/data', 'keo_ai_studio_package.zip')
184
+ with zipfile.ZipFile(zipname, 'w') as z:
185
+ for p, content in package_files.items():
186
+ z.writestr(p, content)
187
+ print('Created package zip at:', zipname)