Spaces:

matthoffner
/

storywriter

Running

File size: 1,840 Bytes

0d67dc2
 
 
 
a2f46f0
0d67dc2
5ad3bc3
4228071
0d67dc2
 
fdb3b96
8bd4741
 
 
0d67dc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716d802
 
 
 
 
a2f46f0
 
716d802
a2f46f0
716d802
0d67dc2
 
 
 
 
716d802
a2f46f0
 
0d67dc2
a2f46f0
0d67dc2

import fastapi
import json
import markdown
import uvicorn
from fastapi.responses import StreamingResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from ctransformers import AutoModelForCausalLM
from pydantic import BaseModel

config = {"max_seq_len": 4096}
llm = AutoModelForCausalLM.from_pretrained('TheBloke/MPT-7B-Storywriter-GGML',
                                           model_file='mpt-7b-storywriter.ggmlv3.q4_0.bin',
                                           model_type='mpt')
app = fastapi.FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/")
async def index():
    with open("README.md", "r", encoding="utf-8") as readme_file:
        md_template_string = readme_file.read()
    html_content = markdown.markdown(md_template_string)
    return HTMLResponse(content=html_content, status_code=200)

class ChatCompletionRequest(BaseModel):
    prompt: str

@app.get("/stream")
async def chat(prompt = "Once upon a time there was a "):
    completion = llm(prompt)
    async def server_sent_events(chat_chunks):
        for chat_chunk in chat_chunks:
            yield chat_chunk
        yield "[DONE]"

    return StreamingResponse(server_sent_events(completion))

@app.post("/v1/chat/completions")
async def chat(request: ChatCompletionRequest, response_mode=None):
    completion = llm(request.prompt)
    async def server_sent_events(chat_chunks):
        for chat_chunk in chat_chunks:
            print(chat_chunk)
            yield chat_chunk
        yield "[DONE]"

    return StreamingResponse(server_sent_events(completion))

if __name__ == "__main__":
  uvicorn.run(app, host="0.0.0.0", port=8000)