Spaces:

hqms
/

baLLseM

Runtime error

App Files Files Community

hqms commited on May 13, 2024

Commit

8578816

1 Parent(s): 04f526c

initial commit

Browse files

Files changed (25) hide show

Dockerfile +51 -0
__init__.py +0 -0
compose.yaml +49 -0
main.py +16 -0
model/__init__.py +5 -0
model/emotion.py +18 -0
model/llm.py +50 -0
model/llm_agent.py +25 -0
model/model.py +68 -0
model/ner.py +13 -0
model/pos_tagging.py +13 -0
model/sentiment.py +17 -0
model/tools/__init__.py +5 -0
model/tools/predictor.py +24 -0
model/tools/sql_tools.py +20 -0
model/tools/wikipedia.py +11 -0
model/topic_modeling.py +0 -0
requirements.txt +120 -0
routers/__init__.py +15 -0
routers/emotion.py +13 -0
routers/ner.py +13 -0
routers/pos_tagging.py +13 -0
routers/prompt.py +14 -0
routers/sentiment.py +13 -0
routers/topic_modeling.py +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,51 @@

+# syntax=docker/dockerfile:1
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/engine/reference/builder/
+ARG PYTHON_VERSION=3.11.9
+FROM python:${PYTHON_VERSION}-slim as base
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/nonexistent" \
+    --shell "/sbin/nologin" \
+    --no-create-home \
+    --uid "${UID}" \
+    appuser
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    python -m pip install -r requirements.txt
+RUN mkdir /nonexistent && chmod -cR 777 /nonexistent
+# Switch to the non-privileged user to run the application.
+USER appuser
+# Copy the source code into the container.
+COPY . .
+# Expose the port that the application listens on.
+EXPOSE 8001
+# Run the application.
+CMD uvicorn 'main:app' --host=0.0.0.0 --port=7860

__init__.py ADDED Viewed

File without changes

compose.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker compose reference guide at
+# https://docs.docker.com/compose/compose-file/
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+services:
+  server:
+    build:
+      context: .
+    ports:
+      - 8001:8001
+# The commented out section below is an example of how to define a PostgreSQL
+# database that your application can use. `depends_on` tells Docker Compose to
+# start the database before your application. The `db-data` volume persists the
+# database data between container restarts. The `db-password` secret is used
+# to set the database password. You must create `db/password.txt` and add
+# a password of your choosing to it before running `docker compose up`.
+#     depends_on:
+#       db:
+#         condition: service_healthy
+#   db:
+#     image: postgres
+#     restart: always
+#     user: postgres
+#     secrets:
+#       - db-password
+#     volumes:
+#       - db-data:/var/lib/postgresql/data
+#     environment:
+#       - POSTGRES_DB=example
+#       - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
+#     expose:
+#       - 5432
+#     healthcheck:
+#       test: [ "CMD", "pg_isready" ]
+#       interval: 10s
+#       timeout: 5s
+#       retries: 5
+# volumes:
+#   db-data:
+# secrets:
+#   db-password:
+#     file: db/password.txt

main.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from routers import router
+app = FastAPI()
+app.include_router(router=router)
+# Set all CORS enabled origins
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+    expose_headers=["*"],
+)

model/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+label = {
+            "emotion": ["sedih", "marah", "takut", "cinta", "senang", "netral"],
+            "sentiment": ["positif", "netral", "negatif"]
+        }

model/emotion.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from model.model import Model
+class EmotionAnalysis(Model):
+    def __init__(self) -> None:
+        self.model_name = "thoriqfy/indobert-emotion-classification"
+        self.tasks = "emotion"
+        self.load_model(model_name=self.model_name, tasks=self.tasks)
+    def predict(self, sentences):
+        outputs = super().predict(sentences, self.tasks)
+        return {
+            "result": outputs["label"],
+            "score": outputs["score"]
+        }
+emotion = EmotionAnalysis()

model/llm.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# from transformers import AutoTokenizer, pipeline, logging
+# from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+# model_name_or_path = "asyafiqe/Merak-7B-v3-Mini-Orca-Indo-GPTQ"
+# model_basename = "Merak-7B-v3-Mini-Orca-Indo-GPTQ"
+# use_triton = False
+# tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+# model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
+#         model_basename=model_basename,
+#         use_safetensors=True,
+#         trust_remote_code=True,
+#         device="cuda:0",
+#         use_triton=use_triton,
+#         quantize_config=None)
+# def predict(prompt):
+#     # prompt = "Buat rencana untuk menghemat listrik di rumah"
+#     system_message = "Anda adalah asisten AI. Anda akan diberi tugas. Anda harus menghasilkan jawaban yang rinci dan panjang.\n"
+#     prompt_template=f'''SYSTEM: {system_message}
+#     USER: {prompt}
+#     ASSISTANT: '''
+#     print("\n\n*** Generate:")
+#     input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
+#     output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
+#     print(tokenizer.decode(output[0]))
+#     # Inference can also be done using transformers' pipeline
+#     # Prevent printing spurious transformers error when using pipeline with AutoGPTQ
+#     logging.set_verbosity(logging.CRITICAL)
+#     print("*** Pipeline:")
+#     pipe = pipeline(
+#         "text-generation",
+#         model=model,
+#         tokenizer=tokenizer,
+#         max_new_tokens=512,
+#         temperature=0.7,
+#         top_p=0.95,
+#         repetition_penalty=1.15
+#     )
+#     result = pipe(prompt_template)[0]['generated_text']
+#     return result

model/llm_agent.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from model.tools import llm
+from model.tools.wikipedia import wikipedia_tool
+# from model.tools.sql_tools import sql_tool
+from model.tools.predictor import word_problem_tool
+from langchain.agents.agent_types import AgentType
+from langchain.agents import initialize_agent
+class LLMAgent(object):
+    def __init__(self) -> None:
+        self.agent = initialize_agent(
+            tools=[wikipedia_tool, word_problem_tool], #sql_tool],
+            llm=llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=False,
+            handle_parsing_errors=True,
+            max_execution_time=3600,  # Set the maximum execution time (in seconds)
+            max_iterations=15  # Set the maximum number of iterations
+        )
+    def prompt(self, text):
+        result = self.agent.invoke(text)
+        return result
+llm_agent = LLMAgent()

model/model.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from ast import literal_eval
+import torch
+from transformers import pipeline
+from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification
+from transformers import BertForSequenceClassification, BertTokenizer, BertConfig
+from math import exp
+from . import label
+class Model(object):
+    def __init__(self) -> None:
+        self.model_name = "indolem/indobert-base-uncased"
+        self.tokenizer = None
+        self.model = None
+        self.config = None
+    def load_model(self, model_name: str = None, tasks: str = None):
+        print(model_name)
+        if tasks == "emotion":
+            self.config = BertConfig.from_pretrained(model_name)
+        self.tokenizer = BertTokenizer.from_pretrained(model_name) \
+            if tasks == "emotion" else \
+            AutoTokenizer.from_pretrained(model_name)
+        if tasks == "emotion":
+            self.model = BertForSequenceClassification.from_pretrained(model_name, config=self.config)
+        elif tasks == "ner":
+            self.model = AutoModelForTokenClassification.from_pretrained(model_name)
+        else:
+            self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    def predict(self, sentences, tasks: str = None):
+        encoded_input = self.tokenizer(sentences,
+                            return_tensors="pt",
+                            padding=True,
+                            truncation=True)
+        with torch.no_grad():
+            if tasks in ["emotion", "sentiment"]:
+                outputs = self.model(**encoded_input)
+                predicted_class = torch.argmax(outputs.logits, dim=1).item()
+                logits = outputs.logits.numpy()
+                probability = [exp(output)/(1+exp(output)) for output in logits[0]]
+            else:
+                recognizer = pipeline("token-classification", model=self.model, tokenizer=self.tokenizer)
+                outputs = recognizer(sentences)
+        if tasks in ["emotion", "sentiment"]:
+            result = {"label": label[tasks][predicted_class],
+                    "score": probability[predicted_class]}
+        elif tasks == "ner":
+            result = []
+            for output in outputs:
+                result.append(
+                    {
+                        "entity": output["entity"],
+                        "score": float(output["score"]),
+                        "index": int(output["index"]),
+                        "word": output["word"],
+                        "start": int(output["start"]),
+                        "end": int(output["end"])
+                    }
+                )
+        else:
+            result = ""
+        return result

model/ner.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.model import Model
+class NER(Model):
+    def __init__(self) -> None:
+        self.model_name = None
+        self.tasks = "ner"
+        self.load_model(model_name="syafiqfaray/indobert-model-ner", tasks=self.tasks)
+    def predict(self, sentences):
+        outputs = super().predict(sentences, self.tasks)
+        return {"result": outputs}
+name_entity = NER()

model/pos_tagging.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.model import Model
+class POSTagging(Model):
+    def __init__(self) -> None:
+        self.model_name = None
+        self.tasks = "pos-tagging"
+        self.load_model(model_name="w11wo/indonesian-roberta-base-posp-tagger", tasks=self.tasks)
+    def predict(self, sentences):
+        outputs = super().predict(sentences, self.tasks)
+        return {"result": outputs}
+pos_tagging = POSTagging()

model/sentiment.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from model.model import Model
+class SentimentAnalysis(Model):
+    def __init__(self) -> None:
+        self.model_name = None
+        self.tasks = "sentiment"
+        self.load_model(model_name="crypter70/IndoBERT-Sentiment-Analysis" , tasks=self.tasks)
+    def predict(self, sentences):
+        outputs = super().predict(sentences, self.tasks)
+        return {
+            "result": outputs["label"],
+            "score": outputs["score"]
+        }
+sentiment = SentimentAnalysis()

model/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from langchain.chains import LLMChain
+from langchain.agents import Tool
+from langchain import HuggingFaceHub
+llm = HuggingFaceHub(repo_id = "microsoft/Phi-3-mini-128k-instruct")

model/tools/predictor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from . import Tool, LLMChain, llm
+from langchain.memory import ConversationBufferMemory
+from langchain_core.messages import SystemMessage
+from langchain_core.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    MessagesPlaceholder,
+)
+system_message = "You are AI Assistant. You need to give crystal clear answer.\n"
+template_messages = [
+        SystemMessage(content=system_message),
+        MessagesPlaceholder(variable_name="chat_history"),
+        HumanMessagePromptTemplate.from_template("{text}"),
+    ]
+prompt_template = ChatPromptTemplate.from_messages(template_messages)
+memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+word_problem_chain = LLMChain(llm=llm, prompt=prompt_template, memory=memory)
+word_problem_tool = Tool.from_function(name="Reasoning Tool", \
+    func=word_problem_chain.run, \
+    description="Useful for when you need to answer logic-based/reasoning \
+                questions.",
+)

model/tools/sql_tools.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# from llama_index.core.tools import QueryEngineTool
+# from sqlalchemy import create_engine
+# username = "test"
+# password = "test"
+# host = "localhost"
+# port = "5432"
+# mydatabase = "database"
+# # Initialize your query engine (replace query_engine with your actual query engine)
+# pg_uri = f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{mydatabase}"
+# engine = create_engine(pg_uri)
+# # Create a tool configuration
+# sql_tool = QueryEngineTool.from_defaults(
+#     engine,
+#     name="SQLTool",
+#     description="Searching from DB",
+#     return_direct=True,
+# )

model/tools/wikipedia.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from . import Tool
+from langchain_community.utilities import WikipediaAPIWrapper
+wikipedia = WikipediaAPIWrapper()
+# Wikipedia Tool
+wikipedia_tool = Tool(
+    name="Wikipedia",
+    func=wikipedia.run,
+    description="A useful tool for searching the Internet to find information on world events, issues, dates, "
+                "years, etc. Worth using for general topics. Use precise questions.",
+)

model/topic_modeling.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,120 @@

+accelerate==0.30.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.3.0
+attrs==23.2.0
+auto-gptq==0.7.1
+beautifulsoup4==4.12.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.6
+datasets==2.19.1
+Deprecated==1.2.14
+dill==0.3.8
+dirtyjson==1.0.8
+distro==1.9.0
+dnspython==2.6.1
+email_validator==2.1.1
+fastapi==0.111.0
+fastapi-cli==0.0.3
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.3.1
+gekko==1.1.1
+greenlet==3.0.3
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.0
+idna==3.7
+Jinja2==3.1.4
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.1.20
+langchain-community==0.0.38
+langchain-core==0.1.52
+langchain-text-splitters==0.0.1
+langsmith==0.1.57
+llama-index==0.10.36
+llama-index-agent-openai==0.2.4
+llama-index-cli==0.1.12
+llama-index-core==0.10.36
+llama-index-embeddings-openai==0.1.9
+llama-index-indices-managed-llama-cloud==0.1.6
+llama-index-legacy==0.9.48
+llama-index-llms-openai==0.1.18
+llama-index-multi-modal-llms-openai==0.1.5
+llama-index-program-openai==0.1.6
+llama-index-question-gen-openai==0.1.3
+llama-index-readers-file==0.1.22
+llama-index-readers-llama-parse==0.1.4
+llama-parse==0.4.2
+llamaindex-py-client==0.1.19
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+networkx==3.3
+nltk==3.8.1
+numpy==1.26.4
+openai==1.28.1
+orjson==3.10.3
+packaging==23.2
+pandas==2.2.2
+peft==0.10.0
+pillow==10.3.0
+psutil==5.9.8
+pyarrow==16.0.0
+pyarrow-hotfix==0.6
+pydantic==2.7.1
+pydantic_core==2.18.2
+Pygments==2.18.0
+pypdf==4.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+regex==2024.5.10
+requests==2.31.0
+rich==13.7.1
+rouge==1.0.1
+safetensors==0.4.3
+sentencepiece==0.2.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.30
+starlette==0.37.2
+striprtf==0.0.26
+sympy==1.12
+tenacity==8.3.0
+tiktoken==0.6.0
+tokenizers==0.19.1
+torch==2.3.0
+tqdm==4.66.4
+transformers==4.40.2
+typer==0.12.3
+typing-inspect==0.9.0
+typing_extensions==4.11.0
+tzdata==2024.1
+ujson==5.9.0
+urllib3==2.2.1
+uvicorn==0.29.0
+uvloop==0.19.0
+watchfiles==0.21.0
+websockets==12.0
+wikipedia==1.4.0
+wrapt==1.16.0
+xxhash==3.4.1
+yarl==1.9.4

routers/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .emotion import emotion_analysis
+from .ner import ner
+from .prompt import llm_prompt
+from .sentiment import sentiment_analysis
+from .pos_tagging import pos
+from .topic_modeling import topic_modeling
+from fastapi import APIRouter
+router = APIRouter()
+router.add_api_route("/emotion-analysis", emotion_analysis.predict, methods=["POST"])
+router.add_api_route("/ner", ner.predict, methods=["POST"])
+router.add_api_route("/pos-tagging", pos.predict, methods=["POST"])
+router.add_api_route("/prompt", llm_prompt.prompt, methods=["POST"])
+router.add_api_route("/sentiment-analysis", sentiment_analysis.predict, methods=["POST"])
+# router.add_api_route("/topic-modeling", topic_modeling.predict, methods=["POST"])

routers/emotion.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.emotion import emotion
+class EmotionAnalysis(object):
+    def __init__(self):
+        pass
+    def predict(self, req: dict):
+        text = req.get("text")
+        result = emotion.predict(text)
+        return result
+emotion_analysis = EmotionAnalysis()

routers/ner.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.ner import name_entity
+class NER(object):
+    def __init__(self):
+        pass
+    def predict(self, req: dict):
+        text = req.get("text")
+        result = name_entity.predict(text)
+        return result
+ner = NER()

routers/pos_tagging.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.pos_tagging import pos_tagging
+class POSTagging(object):
+    def __init__(self):
+        pass
+    def predict(self, req: dict):
+        text = req.get("text")
+        result = pos.predict(text)
+        return result
+pos = POSTagging()

routers/prompt.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from model.llm_agent import llm_agent
+class LLMPrompt(object):
+    def __init__(self):
+        pass
+    def prompt(self, req: dict):
+        text = req.get("text")
+        result = llm_agent.prompt(text)
+        return {"result": result}
+llm_prompt = LLMPrompt()

routers/sentiment.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.sentiment import sentiment
+class SentimentAnalysis(object):
+    def __init__(self):
+        pass
+    def predict(self, req: dict):
+        text = req.get("text")
+        result = sentiment.predict(text)
+        return result
+sentiment_analysis = SentimentAnalysis()

routers/topic_modeling.py ADDED Viewed

	@@ -0,0 +1,10 @@

+class TopicModeling(object):
+    def __init__(self):
+        pass
+    def predict(self, req: dict):
+        text = req.get("text")
+        return {"result": "Jakarta", "score": 0.0}
+topic_modeling = TopicModeling()