Spaces:

hugsid
/

Backend

Sleeping

App Files Files Community

sid_racha commited on May 19

Commit

24fdbf8

1 Parent(s): 5940614

modified dev

Browse files

Files changed (13) hide show

.gitignore +5 -0
Dockerfile +19 -0
app.py +5 -0
app/callbacks.py +24 -0
app/chains.py +53 -0
app/crud.py +23 -0
app/data_indexing.py +150 -0
app/database.py +12 -0
app/main.py +87 -0
app/models.py +28 -0
app/prompts.py +51 -0
app/schemas.py +19 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__/
+*/__pycache__/
+**/__pycache__/
+Test-LLM-Endpoint/
+app/set_env_vars.sh

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.12
+# Create a new user named 'user' with user ID 1000 and create their home directory
+RUN useradd -m -u 1000 user
+# Switch to the newly created user
+USER user
+# Add the user's local bin directory to the PATH
+ENV PATH="/home/user/.local/bin:$PATH"
+# Set the working directory in the container to /app
+WORKDIR /app
+# Copy the requirements.txt file from the host to the container
+# The --chown=user ensures the copied file is owned by our 'user'
+COPY --chown=user ./requirements.txt requirements.txt
+# Install the Python dependencies listed in requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the application code from the host to the container
+# Again, ensure the copied files are owned by 'user'
+COPY --chown=user . /app
+# Specify the command to run when the container starts
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from fastapi import FastAPI
+app = FastAPI()
+@app.get("/")
+def greet_json():
+    return {"Hello": "World!"}

app/callbacks.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Dict, Any, List
+from langchain_core.callbacks import BaseCallbackHandler
+import schemas
+import crud
+class LogResponseCallback(BaseCallbackHandler):
+    def __init__(self, user_request: schemas.UserRequest, db):
+        super().__init__()
+        self.user_request = user_request
+        self.db = db
+    def on_llm_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
+        """Run when llm ends running."""
+        # TODO: The function on_llm_end is going to be called when the LLM stops sending
+        # the response. Use the crud.add_message function to capture that response.
+        raise NotImplemented
+    def on_llm_start(
+        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
+    ) -> Any:
+        for prompt in prompts:
+            print(prompt)

app/chains.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.runnables import RunnablePassthrough
+import schemas
+from prompts import (
+    raw_prompt,
+    raw_prompt_formatted,
+    format_context,
+    # tokenizer
+)
+from data_indexing import DataIndexer
+data_indexer = DataIndexer()
+llm = HuggingFaceEndpoint(
+    model="meta-llama/Llama-3.1-8B-Instruct",
+    huggingfacehub_api_token=os.environ['HF_TOKEN'],
+    max_new_tokens=512,
+    # stop_sequences=[tokenizer.eos_token],
+    streaming=True,
+)
+simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
+# TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.
+formatted_chain = (raw_prompt_formatted | llm).with_types(input_type=schemas.UserQuestion)
+# # TODO: use history_prompt_formatted and HistoryInput to create the history_chain
+# history_chain = None
+# # TODO: Let's construct the standalone_chain by piping standalone_prompt_formatted with the LLM
+# standalone_chain = None
+# input_1 = RunnablePassthrough.assign(new_question=standalone_chain)
+# input_2 = {
+#     'context': lambda x: format_context(data_indexer.search(x['new_question'])),
+#     'standalone_question': lambda x: x['new_question']
+# }
+# input_to_rag_chain = input_1 | input_2
+# # TODO: use input_to_rag_chain, rag_prompt_formatted,
+# # HistoryInput and the LLM to build the rag_chain.
+# rag_chain = None
+# # TODO:  Implement the filtered_rag_chain. It should be the
+# # same as the rag_chain but with hybrid_search = True.
+# filtered_rag_chain = None

app/crud.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from sqlalchemy.orm import Session
+import models, schemas
+def get_or_create_user(db: Session, username: str):
+    user = db.query(models.User).filter(models.User.username == username).first()
+    if not user:
+        user = models.User(username=username)
+        db.add(user)
+        db.commit()
+        db.refresh(user)
+    return user
+def add_message(db: Session, message: schemas.MessageBase, username: str):
+    # TODO:  Implement the add_message function. It should:
+    # - get or create the user with the username
+    # - create a models.Message instance
+    # - pass the retrieved user to the message instance
+    # - save the message instance to the database
+    raise NotImplemented
+def get_user_chat_history(db: Session, username: str):
+    raise NotImplemented

app/data_indexing.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import os
+import uuid
+from pathlib import Path
+from pinecone.grpc import PineconeGRPC as Pinecone
+from pinecone import ServerlessSpec
+from langchain_community.vectorstores import Chroma
+from langchain_openai import OpenAIEmbeddings
+current_dir = Path(__file__).resolve().parent
+class DataIndexer:
+    source_file =  os.path.join(current_dir, 'sources.txt')
+    def __init__(self, index_name='langchain-repo') -> None:
+        # TODO: choose your embedding model
+        # self.embedding_client = InferenceClient(
+        #     "dunzhang/stella_en_1.5B_v5",
+        #      token=os.environ['HF_TOKEN'],
+        # )
+        self.embedding_client = OpenAIEmbeddings()
+        self.index_name = index_name
+        self.pinecone_client = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))
+        if index_name not in self.pinecone_client.list_indexes().names():
+            # TODO: create your index if it doesn't exist. Use the create_index function.
+            # Make sure to choose the dimension that corresponds to your embedding model
+            pass
+        self.index = self.pinecone_client.Index(self.index_name)
+        # TODO: make sure to build the index.
+        self.source_index = None
+    def get_source_index(self):
+        if not os.path.isfile(self.source_file):
+            print('No source file')
+            return None
+        print('create source index')
+        with open(self.source_file, 'r') as file:
+            sources = file.readlines()
+        sources = [s.rstrip('\n') for s in sources]
+        vectorstore = Chroma.from_texts(
+            sources, embedding=self.embedding_client
+        )
+        return vectorstore
+    def index_data(self, docs, batch_size=32):
+        with open(self.source_file, 'a') as file:
+            for doc in docs:
+                file.writelines(doc.metadata['source'] + '\n')
+        for i in range(0, len(docs), batch_size):
+            batch = docs[i: i + batch_size]
+            # TODO: create a list of the vector representations of each text data in the batch
+            # TODO: choose your embedding model
+            # values = self.embedding_client.embed_documents([
+            #     doc.page_content for doc in batch
+            # ])
+            # values = self.embedding_client.feature_extraction([
+            #     doc.page_content for doc in batch
+            # ])
+            values = None
+            # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
+            vector_ids = None
+            # TODO: create a list of dictionaries representing the metadata. Capture the text data
+            # with the "text" key, and make sure to capture the rest of the doc.metadata.
+            metadatas = None
+            # create a list of dictionaries with keys "id" (the unique identifiers), "values"
+            # (the vector representation), and "metadata" (the metadata).
+            vectors = [{
+                'id': vector_id,
+                'values': value,
+                'metadata': metadata
+            } for vector_id, value, metadata in zip(vector_ids, values, metadatas)]
+            try:
+                # TODO: Use the function upsert to upload the data to the database.
+                upsert_response = None
+                print(upsert_response)
+            except Exception as e:
+                print(e)
+    def search(self, text_query, top_k=5, hybrid_search=False):
+        filter = None
+        if hybrid_search and self.source_index:
+            # I implemented the filtering process to pull the 50 most relevant file names
+            # to the question. Make sure to adjust this number as you see fit.
+            source_docs = self.source_index.similarity_search(text_query, 50)
+            filter = {"source": {"$in":[doc.page_content for doc in source_docs]}}
+        # TODO: embed the text_query by using the embedding model
+        # TODO: choose your embedding model
+        # vector = self.embedding_client.feature_extraction(text_query)
+        # vector = self.embedding_client.embed_query(text_query)
+        vector = None
+         # TODO: use the vector representation of the text_query to
+         # search the database by using the query function.
+        result = None
+        docs = []
+        for res in result["matches"]:
+            # TODO: From the result's metadata, extract the "text" element.
+            pass
+        return docs
+if __name__ == '__main__':
+    from langchain_community.document_loaders import GitLoader
+    from langchain_text_splitters import (
+        Language,
+        RecursiveCharacterTextSplitter,
+    )
+    loader = GitLoader(
+        clone_url="https://github.com/langchain-ai/langchain",
+        repo_path="./code_data/langchain_repo/",
+        branch="master",
+    )
+    python_splitter = RecursiveCharacterTextSplitter.from_language(
+        language=Language.PYTHON, chunk_size=10000, chunk_overlap=100
+    )
+    docs = loader.load()
+    docs = [doc for doc in docs if doc.metadata['file_type'] in ['.py', '.md']]
+    docs = [doc for doc in docs if len(doc.page_content) < 50000]
+    docs = python_splitter.split_documents(docs)
+    for doc in docs:
+        doc.page_content = '# {}\n\n'.format(doc.metadata['source']) + doc.page_content
+    indexer = DataIndexer()
+    with open('/app/sources.txt', 'a') as file:
+        for doc in docs:
+            file.writelines(doc.metadata['source'] + '\n')
+    indexer.index_data(docs)

app/database.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
+engine = create_engine(
+    SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
+)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()

app/main.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from langchain_core.runnables import Runnable
+from langchain_core.callbacks import BaseCallbackHandler
+from fastapi import FastAPI, Request, Depends
+from sse_starlette.sse import EventSourceResponse
+from langserve.serialization import WellKnownLCSerializer
+from typing import List
+from sqlalchemy.orm import Session
+import schemas
+from chains import simple_chain
+import crud, models, schemas
+from database import SessionLocal, engine
+from callbacks import LogResponseCallback
+models.Base.metadata.create_all(bind=engine)
+app = FastAPI()
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
+    for output in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
+        data = WellKnownLCSerializer().dumps(output).decode("utf-8")
+        yield {'data': data, "event": "data"}
+    yield {"event": "end"}
+@app.post("/simple/stream")
+async def simple_stream(request: Request):
+    data = await request.json()
+    user_question = schemas.UserQuestion(**data['input'])
+    return EventSourceResponse(generate_stream(user_question, simple_chain))
+@app.post("/formatted/stream")
+async def formatted_stream(request: Request):
+    # TODO: use the formatted_chain to implement the "/formatted/stream" endpoint.
+    raise NotImplemented
+@app.post("/history/stream")
+async def history_stream(request: Request, db: Session = Depends(get_db)):
+    # TODO: Let's implement the "/history/stream" endpoint. The endpoint should follow those steps:
+    # - The endpoint receives the request
+    # - The request is parsed into a user request
+    # - The user request is used to pull the chat history of the user
+    # - We add as part of the user history the current question by using add_message.
+    # - We create an instance of HistoryInput by using format_chat_history.
+    # - We use the history input within the history chain.
+    raise NotImplemented
+@app.post("/rag/stream")
+async def rag_stream(request: Request, db: Session = Depends(get_db)):
+    # TODO: Let's implement the "/rag/stream" endpoint. The endpoint should follow those steps:
+    # - The endpoint receives the request
+    # - The request is parsed into a user request
+    # - The user request is used to pull the chat history of the user
+    # - We add as part of the user history the current question by using add_message.
+    # - We create an instance of HistoryInput by using format_chat_history.
+    # - We use the history input within the rag chain.
+    raise NotImplemented
+@app.post("/filtered_rag/stream")
+async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):
+    # TODO: Let's implement the "/filtered_rag/stream" endpoint. The endpoint should follow those steps:
+    # - The endpoint receives the request
+    # - The request is parsed into a user request
+    # - The user request is used to pull the chat history of the user
+    # - We add as part of the user history the current question by using add_message.
+    # - We create an instance of HistoryInput by using format_chat_history.
+    # - We use the history input within the filtered rag chain.
+    raise NotImplemented
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="localhost", reload=True,  port=8000)

app/models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from sqlalchemy import Column, ForeignKey, Integer, String, DateTime
+from sqlalchemy.orm import relationship
+from database import Base
+class User(Base):
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    username = Column(String, unique=True, index=True)
+    messages = relationship("Message", back_populates="user")
+# TODO: Implement the Message SQLAlchemy model. Message should have a primary key,
+# a message attribute to store the content of messages, a type, AI or Human,
+# depending on if it is a user question or an AI response, a timestamp to
+# order by time and a user attribute to get the user instance associated
+# with the message. We also need a user_id that will use the User.id
+# attribute as a foreign key.
+class Message(Base):
+    __tablename__ = "messages"
+    id = Column(Integer, primary_key=True, index=True)
+    message = Column(String, index=True)
+    type = Column(String) # "AI" or "Human"
+    timestamp = Column(DateTime, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"))
+    user = relationship("User", back_populates="messages")

app/prompts.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from langchain_core.prompts import PromptTemplate
+from typing import List
+import models
+def format_prompt(prompt) -> PromptTemplate:
+    # TODO: format the input prompt by using the model specific instruction template
+    # TODO: return a langchain PromptTemplate
+    return PromptTemplate.from_template(prompt)
+def format_chat_history(messages: List[models.Message]):
+    # TODO:  implement format_chat_history to format
+    # the list of Message into a text of chat history.
+    raise NotImplemented
+def format_context(docs: List[str]):
+    # TODO:  the output of the DataIndexer.search is a list of text,
+    # so we need to concatenate that list into a text that can fit into
+    # the rag_prompt_formatted. Implement format_context that takes a
+    # like of strings and returns the context as one string.
+    raise NotImplemented
+raw_prompt = "{question}"
+# TODO: Create the history_prompt prompt that will capture the question and the conversation history.
+# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
+history_prompt: str = None
+# TODO: Create the standalone_prompt prompt that will capture the question and the chat history
+# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
+standalone_prompt: str = None
+# TODO: Create the rag_prompt that will capture the context and the standalone question to generate
+# a final answer to the question.
+rag_prompt: str = None
+# TODO: create raw_prompt_formatted by using format_prompt
+raw_prompt_formatted = None
+raw_prompt = PromptTemplate.from_template(raw_prompt)
+# TODO: use format_prompt to create history_prompt_formatted
+history_prompt_formatted: PromptTemplate = None
+# TODO: use format_prompt to create standalone_prompt_formatted
+standalone_prompt_formatted: PromptTemplate = None
+# TODO: use format_prompt to create rag_prompt_formatted
+rag_prompt_formatted: PromptTemplate = None

app/schemas.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pydantic.v1 import BaseModel
+class UserQuestion(BaseModel):
+    question: str
+# TODO: create a HistoryInput data model with a chat_history and question attributes.
+class HistoryInput(BaseModel):
+    pass
+# TODO: let's create a UserRequest data model with a question and username attribute.
+# This will be used to parse the input request.
+class UserRequest(BaseModel):
+    username: str
+# TODO: implement MessageBase as a schema mapping from the database model to the
+# FastAPI data model. Basically MessageBase should have the same attributes as models.Message
+class MessageBase(BaseModel):
+    pass

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+fastapi
+uvicorn[standard]
+langchain-huggingface==0.2.0