Spaces:

osmankoc
/

zoa-llm-api

Sleeping

osmankoc commited on Feb 3

Commit

d844f20

1 Parent(s): 115777a

Initial commit: FastAPI Hugging Face deployment

Files changed (4) hide show

.gitignore ADDED Viewed

+# Python derleme dosyaları
+__pycache__/
+*.py[cod]
+*.so
+# Sanal ortamlar
+venv/
+env/
+*.env
+*.venv
+# Sistem dosyaları
+.DS_Store
+Thumbs.db
+# Hugging Face Cache (Gereksiz model indirmelerini engeller)
+~/.cache/huggingface/
+# Model ağırlıkları (Eğer büyük boyutlu modelleri manuel olarak eklediysen)
+*.bin
+*.h5

Dockerfile ADDED Viewed

+# Resmi Python görüntüsünü kullan
+FROM python:3.9
+# Çalışma dizinini oluştur
+WORKDIR /app
+# Gerekli dosyaları kopyala
+COPY requirements.txt requirements.txt
+# Bağımlılıkları yükle
+RUN pip install --no-cache-dir -r requirements.txt
+# Ana uygulama dosyasını kopyala
+COPY app.py app.py
+# FastAPI çalıştır
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+from fastapi import FastAPI
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+app = FastAPI()
+MODEL_NAME = "osmankoc/llama-2-7b-zoa"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading model: {MODEL_NAME} on {device}...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
+@app.get("/")
+def home():
+    return {"message": "ZOA AI Model API is running!"}
+@app.post("/generate/")
+def generate_text(prompt: str):
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    output = model.generate(**inputs, max_length=500)
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return {"response": response}

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+torch
+transformers
+accelerate