File size: 5,878 Bytes
696c38d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# keo_ai_studio package (single-file view)
# Save this structure locally as shown in README below.
# setup.py
setup_py = r"""
from setuptools import setup, find_packages
setup(
name="keo-ai-studio",
version="0.1.0",
packages=find_packages(),
install_requires=[
"transformers>=4.30.0",
"torch>=1.12.0"
],
entry_points={
'console_scripts': [
'keo-chat=keo_ai_studio.cli:main'
]
},
author="ุงูุนุจูุฑู ูุฑูู
ุญุณูู",
description="keo ai studio - thin python wrapper for local LLMs with optional fine-tune helpers",
url="",
)
"""
# keo_ai_studio/__init__.py
init_py = r"""
"""
from .model import KeoAI
from .trainer import finetune
__all__ = ["KeoAI", "finetune"]
"""
# keo_ai_studio/model.py
model_py = r"""
import os
from typing import Optional
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
except Exception:
# lazy import fallback: useful so package imports even if transformers not installed
AutoTokenizer = None
AutoModelForCausalLM = None
pipeline = None
class KeoAI:
"""Thin wrapper that loads a Hugging Face compatible model or local folder.
Usage:
k = KeoAI(model_name_or_path="path_or_hf_id")
k.chat("ุงูุณุคุงู ููุงุ")
If transformers is not installed, the object will raise when used.
"""
def __init__(self, model_name_or_path: Optional[str] = None, alias: str = "keo ai studio"):
self.alias = alias
self.model_name_or_path = model_name_or_path or os.getcwd()
if AutoTokenizer is None:
raise RuntimeError("transformers not installed. Run: pip install transformers torch")
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path)
# convenience pipeline
self._pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
def chat(self, prompt: str, max_new_tokens: int = 128, do_sample: bool = True):
"""Generate a reply for given prompt."""
full = self._pipe(prompt, max_new_tokens=max_new_tokens, do_sample=do_sample)
return full[0]["generated_text"]
def reply_author(self):
return "ุงูุนุจูุฑู ูุฑูู
ุญุณูู"
def smart_answer(self, question: str):
q_low = question.strip().lower()
if any(x in q_low for x in ["ู
ูู ุนู
ูู","ู
ู ุตูุนู","ู
ู ุงูุดุฃู","who made you","who created you"]):
return self.reply_author()
return self.chat(question)
"""
# keo_ai_studio/trainer.py
trainer_py = r"""
# Very small helper functions to fine-tune a causal LM using Hugging Face Trainer.
# This file expects transformers, datasets, accelerate installed and a prepared dataset.
def finetune(model_path_or_id, dataset_path, output_dir, epochs=1, batch_size=2, lr=2e-5):
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset
tokenizer = AutoTokenizer.from_pretrained(model_path_or_id)
model = AutoModelForCausalLM.from_pretrained(model_path_or_id)
ds = load_dataset('text', data_files={'train': dataset_path})
def tokf(ex):
return tokenizer(ex['text'], truncation=True, max_length=1024)
tokenized = ds.map(tokf, batched=True, remove_columns=['text'])
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=epochs,
per_device_train_batch_size=batch_size,
save_total_limit=2,
logging_steps=200,
fp16=False,
)
trainer = Trainer(model=model, args=training_args, train_dataset=tokenized['train'], data_collator=data_collator)
trainer.train()
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
"""
# keo_ai_studio/cli.py
cli_py = r"""
import argparse
from .model import KeoAI
def main():
parser = argparse.ArgumentParser(prog='keo-chat')
parser.add_argument('--model', '-m', default=None, help='model id or local path')
args = parser.parse_args()
k = KeoAI(args.model)
print('keo ai studio interactive. type exit to quit')
while True:
try:
q = input('> ')
except EOFError:
break
if not q: continue
if q.strip().lower() in ('exit','quit','ุฎุฑูุฌ'): break
print('\n' + k.smart_answer(q) + '\n')
if __name__ == '__main__':
main()
"""
# README.md
readme = r"""
keo-ai-studio
=============
Lightweight Python package that wraps a Hugging Face compatible causal LM.
Installation (from local folder):
```bash
pip install .
```
Usage:
```python
from keo_ai_studio import KeoAI
k = KeoAI(model_name_or_path='path_or_hf_id')
print(k.smart_answer('ู
ู ุนู
ููุ')) # returns the author line
print(k.smart_answer('ุงุดุฑุญ ุจุงูุซูู'))
```
Fine-tune helper:
```python
from keo_ai_studio import finetune
finetune('gpt2', 'data/my_corpus.txt', './keo_finetuned', epochs=1)
```
"""
# Combined package writer - instruct user to create files
package_files = {
'setup.py': setup_py,
'keo_ai_studio/__init__.py': init_py,
'keo_ai_studio/model.py': model_py,
'keo_ai_studio/trainer.py': trainer_py,
'keo_ai_studio/cli.py': cli_py,
'README.md': readme,
}
print('Files to create in your project:')
for p in package_files:
print('-', p)
# For convenience, write them to a zip in current working dir for user to download locally
import zipfile, os
zipname = os.path.join('/mnt/data', 'keo_ai_studio_package.zip')
with zipfile.ZipFile(zipname, 'w') as z:
for p, content in package_files.items():
z.writestr(p, content)
print('Created package zip at:', zipname)
|