|
import os |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.llms import Ollama |
|
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex |
|
from llama_index.core.indices.prompt_helper import PromptHelper |
|
from llama_index.core.node_parser import SentenceSplitter |
|
|
|
|
|
def setup_environment(): |
|
|
|
os.environ['llm'] = 'tinyllama' |
|
|
|
|
|
def initialize_settings(): |
|
|
|
llm_model = os.getenv('llm') |
|
llm = Ollama(model=llm_model) |
|
embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") |
|
|
|
|
|
prompt_helper = PromptHelper(context_window=2048) |
|
node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=20) |
|
|
|
|
|
Settings.llm = llm |
|
Settings.embed_model = embed_model |
|
Settings.prompt_helper = prompt_helper |
|
Settings.node_parser = node_parser |
|
|
|
|
|
def load_and_index_data(file_path): |
|
|
|
document = SimpleDirectoryReader(input_files=[file_path]).load_data() |
|
|
|
|
|
return VectorStoreIndex.from_documents(document) |
|
|
|
|
|
def query_data(query_engine, query): |
|
|
|
return query_engine.query(query) |
|
|
|
|
|
if __name__ == "__main__": |
|
setup_environment() |
|
initialize_settings() |
|
|
|
|
|
file_path = 'data.txt' |
|
index = load_and_index_data(file_path) |
|
|
|
|
|
query_engine = index.as_query_engine() |
|
|
|
|
|
response = query_data(query_engine, 'show me my calander dates.') |
|
print(response) |
|
|