tommytracx commited on
Commit
125d97a
·
verified ·
1 Parent(s): 81e48d9

Delete handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -106
handler.py DELETED
@@ -1,106 +0,0 @@
1
- import os
2
- from llama_cpp import Llama
3
-
4
- class EndpointHandler:
5
- def __init__(self, model_dir):
6
- """
7
- Initialize the handler with the GGUF model directory.
8
- """
9
- self.model_dir = model_dir
10
- self.model_path = os.path.join(model_dir, "ogai-8x7b-q4_k_m.gguf") # Ensure GGUF model path is correct
11
- self.context_window = 32768 # Supports up to 32K tokens for RAG jobs
12
- self.max_new_tokens = 4086 # Limit token generation
13
-
14
- # System Instructions for OGAI-8x7b
15
- self.system_instructions = """
16
- You are OGAI-8x7b, a specialized large language model trained for the oil and gas industry within the Upstrima AI ecosystem.
17
- Your primary purpose is to assist engineers, technicians, analysts, and decision-makers with domain-specific knowledge,
18
- calculations, and insights related to oil and gas operations. You must follow Upstrima's formatting, technical accuracy,
19
- and safety prioritization in all responses.
20
-
21
- **Core Capabilities:**
22
- 1. **Domain Expertise** – Upstream, Midstream, Downstream, Regulatory Compliance
23
- 2. **Engineering Calculations** – Production Optimization, Equipment Sizing, Troubleshooting
24
- 3. **Data Analysis** – Well Logs, Reservoir Data, Anomaly Detection, Risk Assessment
25
- 4. **Documentation** – Technical Reports, Regulatory Submissions, Training Materials
26
-
27
- **Response Standards:**
28
- - Use Markdown formatting for structured responses
29
- - Show calculations with LaTeX equations
30
- - Prioritize safety, compliance, and technical accuracy
31
- """
32
-
33
- # Greeting system response
34
- self.greeting_response = "**Welcome to Upstrima AI!** 🚀\n\nI am OGAI-8x7b, your specialized assistant for oil & gas engineering. How can I assist you today?"
35
-
36
- # Load GGUF model with memory optimization
37
- self.load_model()
38
-
39
- def load_model(self):
40
- """
41
- Load the GGUF model using llama-cpp-python.
42
- Automatically offloads layers to GPU if available.
43
- """
44
- try:
45
- self.model = Llama(
46
- model_path=self.model_path,
47
- n_ctx=self.context_window, # Supports 32K tokens for RAG
48
- n_gpu_layers=20, # Offload 20 layers to GPU (adjust based on available memory)
49
- verbose=False
50
- )
51
- except Exception as e:
52
- raise RuntimeError(f"Failed to load model: {e}")
53
-
54
- def detect_greeting(self, user_input):
55
- """
56
- Check if the user input is a greeting.
57
- """
58
- greetings = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "greetings"]
59
- return user_input.lower().strip() in greetings
60
-
61
- def process_prompt(self, user_input):
62
- """
63
- Handles user input, ensuring structured responses.
64
- If a greeting is detected, the system responds automatically before the LLM processes further requests.
65
- """
66
- if self.detect_greeting(user_input):
67
- return f"{self.greeting_response}\n\n" # Send a predefined system message
68
-
69
- return f"{self.system_instructions}\n\nUser Query: {user_input}\n\nResponse:"
70
-
71
- def __call__(self, payload):
72
- """
73
- Handles inference requests, supporting both direct text input and RAG queries.
74
- """
75
- if not hasattr(self, "model"):
76
- return {"error": "Model is not loaded"}
77
-
78
- # Extract input
79
- if isinstance(payload, str):
80
- prompt = payload
81
- elif isinstance(payload, dict):
82
- prompt = payload.get("inputs", "")
83
- if not prompt:
84
- return {"error": "Missing 'inputs' key in the payload"}
85
- else:
86
- return {"error": "Payload must be a string or dictionary"}
87
-
88
- # Detect greetings and generate a system response if applicable
89
- if self.detect_greeting(prompt):
90
- return {"generated_text": self.greeting_response}
91
-
92
- # Ensure system instructions are prepended for normal queries
93
- full_prompt = self.process_prompt(prompt)
94
-
95
- # Generate response using GGUF model
96
- try:
97
- output = self.model(
98
- full_prompt,
99
- max_tokens=self.max_new_tokens,
100
- temperature=0.7,
101
- top_p=0.9
102
- )
103
- except Exception as e:
104
- return {"error": f"Inference error: {e}"}
105
-
106
- return {"generated_text": output["choices"][0]["text"].strip()}