|
|
|
|
|
|
|
|
|
setup_py = r""" |
|
from setuptools import setup, find_packages |
|
|
|
setup( |
|
name="keo-ai-studio", |
|
version="0.1.0", |
|
packages=find_packages(), |
|
install_requires=[ |
|
"transformers>=4.30.0", |
|
"torch>=1.12.0" |
|
], |
|
entry_points={ |
|
'console_scripts': [ |
|
'keo-chat=keo_ai_studio.cli:main' |
|
] |
|
}, |
|
author="ุงูุนุจูุฑู ูุฑูู
ุญุณูู", |
|
description="keo ai studio - thin python wrapper for local LLMs with optional fine-tune helpers", |
|
url="", |
|
) |
|
""" |
|
|
|
|
|
init_py = r""" |
|
""" |
|
from .model import KeoAI |
|
from .trainer import finetune |
|
|
|
__all__ = ["KeoAI", "finetune"] |
|
""" |
|
|
|
# keo_ai_studio/model.py |
|
model_py = r""" |
|
import os |
|
from typing import Optional |
|
|
|
try: |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
except Exception: |
|
|
|
AutoTokenizer = None |
|
AutoModelForCausalLM = None |
|
pipeline = None |
|
|
|
class KeoAI: |
|
"""Thin wrapper that loads a Hugging Face compatible model or local folder. |
|
Usage: |
|
k = KeoAI(model_name_or_path="path_or_hf_id") |
|
k.chat("ุงูุณุคุงู ููุงุ") |
|
If transformers is not installed, the object will raise when used. |
|
""" |
|
def __init__(self, model_name_or_path: Optional[str] = None, alias: str = "keo ai studio"): |
|
self.alias = alias |
|
self.model_name_or_path = model_name_or_path or os.getcwd() |
|
if AutoTokenizer is None: |
|
raise RuntimeError("transformers not installed. Run: pip install transformers torch") |
|
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) |
|
self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path) |
|
|
|
self._pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer) |
|
|
|
def chat(self, prompt: str, max_new_tokens: int = 128, do_sample: bool = True): |
|
"""Generate a reply for given prompt.""" |
|
full = self._pipe(prompt, max_new_tokens=max_new_tokens, do_sample=do_sample) |
|
return full[0]["generated_text"] |
|
|
|
def reply_author(self): |
|
return "ุงูุนุจูุฑู ูุฑูู
ุญุณูู" |
|
|
|
def smart_answer(self, question: str): |
|
q_low = question.strip().lower() |
|
if any(x in q_low for x in ["ู
ูู ุนู
ูู","ู
ู ุตูุนู","ู
ู ุงูุดุฃู","who made you","who created you"]): |
|
return self.reply_author() |
|
return self.chat(question) |
|
""" |
|
|
|
# keo_ai_studio/trainer.py |
|
trainer_py = r""" |
|
|
|
|
|
|
|
def finetune(model_path_or_id, dataset_path, output_dir, epochs=1, batch_size=2, lr=2e-5): |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling |
|
from datasets import load_dataset |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path_or_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_path_or_id) |
|
|
|
ds = load_dataset('text', data_files={'train': dataset_path}) |
|
def tokf(ex): |
|
return tokenizer(ex['text'], truncation=True, max_length=1024) |
|
tokenized = ds.map(tokf, batched=True, remove_columns=['text']) |
|
|
|
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) |
|
training_args = TrainingArguments( |
|
output_dir=output_dir, |
|
num_train_epochs=epochs, |
|
per_device_train_batch_size=batch_size, |
|
save_total_limit=2, |
|
logging_steps=200, |
|
fp16=False, |
|
) |
|
trainer = Trainer(model=model, args=training_args, train_dataset=tokenized['train'], data_collator=data_collator) |
|
trainer.train() |
|
trainer.save_model(output_dir) |
|
tokenizer.save_pretrained(output_dir) |
|
""" |
|
|
|
# keo_ai_studio/cli.py |
|
cli_py = r""" |
|
import argparse |
|
from .model import KeoAI |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(prog='keo-chat') |
|
parser.add_argument('--model', '-m', default=None, help='model id or local path') |
|
args = parser.parse_args() |
|
k = KeoAI(args.model) |
|
print('keo ai studio interactive. type exit to quit') |
|
while True: |
|
try: |
|
q = input('> ') |
|
except EOFError: |
|
break |
|
if not q: continue |
|
if q.strip().lower() in ('exit','quit','ุฎุฑูุฌ'): break |
|
print('\n' + k.smart_answer(q) + '\n') |
|
|
|
if __name__ == '__main__': |
|
main() |
|
""" |
|
|
|
# README.md |
|
readme = r""" |
|
keo-ai-studio |
|
============= |
|
|
|
Lightweight Python package that wraps a Hugging Face compatible causal LM. |
|
|
|
Installation (from local folder): |
|
|
|
```bash |
|
pip install . |
|
``` |
|
|
|
Usage: |
|
|
|
```python |
|
from keo_ai_studio import KeoAI |
|
k = KeoAI(model_name_or_path='path_or_hf_id') |
|
print(k.smart_answer('ู
ู ุนู
ููุ')) |
|
print(k.smart_answer('ุงุดุฑุญ ุจุงูุซูู')) |
|
``` |
|
|
|
Fine-tune helper: |
|
|
|
```python |
|
from keo_ai_studio import finetune |
|
finetune('gpt2', 'data/my_corpus.txt', './keo_finetuned', epochs=1) |
|
``` |
|
""" |
|
|
|
# Combined package writer - instruct user to create files |
|
package_files = { |
|
'setup.py': setup_py, |
|
'keo_ai_studio/__init__.py': init_py, |
|
'keo_ai_studio/model.py': model_py, |
|
'keo_ai_studio/trainer.py': trainer_py, |
|
'keo_ai_studio/cli.py': cli_py, |
|
'README.md': readme, |
|
} |
|
|
|
print('Files to create in your project:') |
|
for p in package_files: |
|
print('-', p) |
|
|
|
# For convenience, write them to a zip in current working dir for user to download locally |
|
import zipfile, os |
|
zipname = os.path.join('/mnt/data', 'keo_ai_studio_package.zip') |
|
with zipfile.ZipFile(zipname, 'w') as z: |
|
for p, content in package_files.items(): |
|
z.writestr(p, content) |
|
print('Created package zip at:', zipname) |
|
|