Delete handler.py
Browse files- handler.py +0 -106
handler.py
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from llama_cpp import Llama
|
3 |
-
|
4 |
-
class EndpointHandler:
|
5 |
-
def __init__(self, model_dir):
|
6 |
-
"""
|
7 |
-
Initialize the handler with the GGUF model directory.
|
8 |
-
"""
|
9 |
-
self.model_dir = model_dir
|
10 |
-
self.model_path = os.path.join(model_dir, "ogai-8x7b-q4_k_m.gguf") # Ensure GGUF model path is correct
|
11 |
-
self.context_window = 32768 # Supports up to 32K tokens for RAG jobs
|
12 |
-
self.max_new_tokens = 4086 # Limit token generation
|
13 |
-
|
14 |
-
# System Instructions for OGAI-8x7b
|
15 |
-
self.system_instructions = """
|
16 |
-
You are OGAI-8x7b, a specialized large language model trained for the oil and gas industry within the Upstrima AI ecosystem.
|
17 |
-
Your primary purpose is to assist engineers, technicians, analysts, and decision-makers with domain-specific knowledge,
|
18 |
-
calculations, and insights related to oil and gas operations. You must follow Upstrima's formatting, technical accuracy,
|
19 |
-
and safety prioritization in all responses.
|
20 |
-
|
21 |
-
**Core Capabilities:**
|
22 |
-
1. **Domain Expertise** – Upstream, Midstream, Downstream, Regulatory Compliance
|
23 |
-
2. **Engineering Calculations** – Production Optimization, Equipment Sizing, Troubleshooting
|
24 |
-
3. **Data Analysis** – Well Logs, Reservoir Data, Anomaly Detection, Risk Assessment
|
25 |
-
4. **Documentation** – Technical Reports, Regulatory Submissions, Training Materials
|
26 |
-
|
27 |
-
**Response Standards:**
|
28 |
-
- Use Markdown formatting for structured responses
|
29 |
-
- Show calculations with LaTeX equations
|
30 |
-
- Prioritize safety, compliance, and technical accuracy
|
31 |
-
"""
|
32 |
-
|
33 |
-
# Greeting system response
|
34 |
-
self.greeting_response = "**Welcome to Upstrima AI!** 🚀\n\nI am OGAI-8x7b, your specialized assistant for oil & gas engineering. How can I assist you today?"
|
35 |
-
|
36 |
-
# Load GGUF model with memory optimization
|
37 |
-
self.load_model()
|
38 |
-
|
39 |
-
def load_model(self):
|
40 |
-
"""
|
41 |
-
Load the GGUF model using llama-cpp-python.
|
42 |
-
Automatically offloads layers to GPU if available.
|
43 |
-
"""
|
44 |
-
try:
|
45 |
-
self.model = Llama(
|
46 |
-
model_path=self.model_path,
|
47 |
-
n_ctx=self.context_window, # Supports 32K tokens for RAG
|
48 |
-
n_gpu_layers=20, # Offload 20 layers to GPU (adjust based on available memory)
|
49 |
-
verbose=False
|
50 |
-
)
|
51 |
-
except Exception as e:
|
52 |
-
raise RuntimeError(f"Failed to load model: {e}")
|
53 |
-
|
54 |
-
def detect_greeting(self, user_input):
|
55 |
-
"""
|
56 |
-
Check if the user input is a greeting.
|
57 |
-
"""
|
58 |
-
greetings = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "greetings"]
|
59 |
-
return user_input.lower().strip() in greetings
|
60 |
-
|
61 |
-
def process_prompt(self, user_input):
|
62 |
-
"""
|
63 |
-
Handles user input, ensuring structured responses.
|
64 |
-
If a greeting is detected, the system responds automatically before the LLM processes further requests.
|
65 |
-
"""
|
66 |
-
if self.detect_greeting(user_input):
|
67 |
-
return f"{self.greeting_response}\n\n" # Send a predefined system message
|
68 |
-
|
69 |
-
return f"{self.system_instructions}\n\nUser Query: {user_input}\n\nResponse:"
|
70 |
-
|
71 |
-
def __call__(self, payload):
|
72 |
-
"""
|
73 |
-
Handles inference requests, supporting both direct text input and RAG queries.
|
74 |
-
"""
|
75 |
-
if not hasattr(self, "model"):
|
76 |
-
return {"error": "Model is not loaded"}
|
77 |
-
|
78 |
-
# Extract input
|
79 |
-
if isinstance(payload, str):
|
80 |
-
prompt = payload
|
81 |
-
elif isinstance(payload, dict):
|
82 |
-
prompt = payload.get("inputs", "")
|
83 |
-
if not prompt:
|
84 |
-
return {"error": "Missing 'inputs' key in the payload"}
|
85 |
-
else:
|
86 |
-
return {"error": "Payload must be a string or dictionary"}
|
87 |
-
|
88 |
-
# Detect greetings and generate a system response if applicable
|
89 |
-
if self.detect_greeting(prompt):
|
90 |
-
return {"generated_text": self.greeting_response}
|
91 |
-
|
92 |
-
# Ensure system instructions are prepended for normal queries
|
93 |
-
full_prompt = self.process_prompt(prompt)
|
94 |
-
|
95 |
-
# Generate response using GGUF model
|
96 |
-
try:
|
97 |
-
output = self.model(
|
98 |
-
full_prompt,
|
99 |
-
max_tokens=self.max_new_tokens,
|
100 |
-
temperature=0.7,
|
101 |
-
top_p=0.9
|
102 |
-
)
|
103 |
-
except Exception as e:
|
104 |
-
return {"error": f"Inference error: {e}"}
|
105 |
-
|
106 |
-
return {"generated_text": output["choices"][0]["text"].strip()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|