Spaces:

HeheBoi0769
/

Nexus_NLP_model

Sleeping

App Files Files Community

Krish Patel commited on Feb 12

Commit

94a65e4

1 Parent(s): 898162d

Added gemini analysis and knowledge graph

Browse files

Files changed (6) hide show

.gitignore +1 -0
app.py +90 -196
final.py +418 -48
knowledge_graph_final.pkl +3 -0
prev_final.py +142 -0
test.py +48 -11

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

app.py CHANGED Viewed

@@ -1,14 +1,9 @@
 # import streamlit as st
 # import torch
 # from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # # Load the model and tokenizer
-# # @st.cache_resource
-# # def load_model():
-# #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
-# #     model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
-# #     model.eval()
-# #     return tokenizer, model
 # @st.cache_resource
 # def load_model():
 #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
@@ -25,202 +20,101 @@
 #     confidence = probabilities[0][predicted_label].item()
 #     return "FAKE" if predicted_label == 1 else "REAL", confidence
-# def main():
-#     st.title("News Classifier")
-#     # Load model
 #     tokenizer, model = load_model()
-#     # Text input
-#     news_text = st.text_area("Enter news text to analyze:", height=200)
-#     if st.button("Classify"):
-#         if news_text:
-#             with st.spinner('Analyzing...'):
-#                 prediction, confidence = predict_news(news_text, tokenizer, model)
-#                 # Display results
-#                 if prediction == "FAKE":
-#                     st.error(f"⚠️ {prediction} NEWS")
-#                 else:
-#                     st.success(f"✅ {prediction} NEWS")
-#                 st.info(f"Confidence: {confidence*100:.2f}%")
-# if __name__ == "__main__":
-#     main()
-# # import streamlit as st
-# # import torch
-# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# # from fastapi import FastAPI, Request
-# # from pydantic import BaseModel
-# # from threading import Thread
-# # from streamlit.web import cli
-# # # FastAPI app
-# # api_app = FastAPI()
-# # # Load the model and tokenizer
-# # @st.cache_resource
-# # def load_model():
-# #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
-# #     model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
-# #     model.eval()
-# #     return tokenizer, model
-# # # Prediction function
-# # def predict_news(text, tokenizer, model):
-# #     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-# #     with torch.no_grad():
-# #         outputs = model(**inputs)
-# #     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-# #     predicted_label = torch.argmax(probabilities, dim=-1).item()
-# #     confidence = probabilities[0][predicted_label].item()
-# #     return "FAKE" if predicted_label == 1 else "REAL", confidence
-# # # FastAPI request model
-# # class NewsInput(BaseModel):
-# #     text: str
-# # # FastAPI route for POST requests
-# # @api_app.post("/classify")
-# # async def classify_news(data: NewsInput):
-# #     tokenizer, model = load_model()
-# #     prediction, confidence = predict_news(data.text, tokenizer, model)
-# #     return {
-# #         "prediction": prediction,
-# #         "confidence": f"{confidence*100:.2f}%"
-# #     }
-# # # Streamlit app
-# # def run_streamlit():
-# #     def main():
-# #         st.title("News Classifier")
-# #         # Load model
-# #         tokenizer, model = load_model()
-# #         # Text input
-# #         news_text = st.text_area("Enter news text to analyze:", height=200)
-# #         if st.button("Classify"):
-# #             if news_text:
-# #                 with st.spinner('Analyzing...'):
-# #                     prediction, confidence = predict_news(news_text, tokenizer, model)
-# #                     # Display results
-# #                     if prediction == "FAKE":
-# #                         st.error(f"⚠️ {prediction} NEWS")
-# #                     else:
-# #                         st.success(f"✅ {prediction} NEWS")
-# #                     st.info(f"Confidence: {confidence*100:.2f}%")
-# #     main()
-# # # Threaded execution for FastAPI and Streamlit
-# # def start_fastapi():
-# #     import uvicorn
-# #     uvicorn.run(api_app, host="0.0.0.0", port=8502)
-# # if __name__ == "__main__":
-# #     fastapi_thread = Thread(target=start_fastapi, daemon=True)
-# #     fastapi_thread.start()
-# #     # Start Streamlit
-# #     cli.main()
-# # # from fastapi import FastAPI, HTTPException
-# # # from pydantic import BaseModel
-# # # from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# # # import torch
-# # # from fastapi.middleware.cors import CORSMiddleware
-# # # # Define the FastAPI app
-# # # app = FastAPI()
-# # # app.add_middleware(
-# # #     CORSMiddleware,
-# # #     allow_origins=["*"],  # Update with your frontend's URL for security
-# # #     allow_credentials=True,
-# # #     allow_methods=["*"],
-# # #     allow_headers=["*"],
-# # # )
-# # # # Define the input data schema
-# # # class InputText(BaseModel):
-# # #     text: str
-# # # # Load the model and tokenizer (ensure these paths are correct in your Space)
-# # # tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
-# # # model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
-# # # model.eval()
-# # # # Prediction function
-# # # def predict_news(text: str):
-# # #     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-# # #     with torch.no_grad():
-# # #         outputs = model(**inputs)
-# # #     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-# # #     predicted_label = torch.argmax(probabilities, dim=-1).item()
-# # #     confidence = probabilities[0][predicted_label].item()
-# # #     return {
-# # #         "prediction": "FAKE" if predicted_label == 1 else "REAL",
-# # #         "confidence": round(confidence * 100, 2)  # Return confidence as a percentage
-# # #     }
-# # # # Define the POST endpoint
-# # # @app.post("/predict")
-# # # async def classify_news(input_text: InputText):
-# # #     try:
-# # #         result = predict_news(input_text.text)
-# # #         return result
-# # #     except Exception as e:
-# # #         raise HTTPException(status_code=500, detail=str(e))
 import streamlit as st
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import json
-# Load the model and tokenizer
 @st.cache_resource
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
-    model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
-    model.eval()
-    return tokenizer, model
-def predict_news(text, tokenizer, model):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    predicted_label = torch.argmax(probabilities, dim=-1).item()
-    confidence = probabilities[0][predicted_label].item()
-    return "FAKE" if predicted_label == 1 else "REAL", confidence
 # Streamlit UI
-st.title("News Classifier API")
-# If running as an API, get the request from query parameters
-query_params = st.query_params
-if "text" in query_params:
-    text_input = query_params["text"][0]  # Get text input from URL query
-    tokenizer, model = load_model()
-    prediction, confidence = predict_news(text_input, tokenizer, model)
-    # Return JSON response
-    st.json({"prediction": prediction, "confidence": confidence})
-# If running in UI mode, show text input
-else:
-    text_input = st.text_area("Enter news text:")
-    if st.button("Classify"):
-        tokenizer, model = load_model()
-        prediction, confidence = predict_news(text_input, tokenizer, model)
-        st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")

 # import streamlit as st
 # import torch
 # from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# import json
 # # Load the model and tokenizer
 # @st.cache_resource
 # def load_model():
 #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
 #     confidence = probabilities[0][predicted_label].item()
 #     return "FAKE" if predicted_label == 1 else "REAL", confidence
+# # Streamlit UI
+# st.title("News Classifier API")
+# # If running as an API, get the request from query parameters
+# query_params = st.query_params
+# if "text" in query_params:
+#     text_input = query_params["text"][0]  # Get text input from URL query
 #     tokenizer, model = load_model()
+#     prediction, confidence = predict_news(text_input, tokenizer, model)
+#     # Return JSON response
+#     st.json({"prediction": prediction, "confidence": confidence})
+# # If running in UI mode, show text input
+# else:
+#     text_input = st.text_area("Enter news text:")
+#     if st.button("Classify"):
+#         tokenizer, model = load_model()
+#         prediction, confidence = predict_news(text_input, tokenizer, model)
+#         st.write(f"Prediction: {prediction} (Confidence: {confidence*100:.2f}%)")
 import streamlit as st
+from final import *
+import pandas as pd
+# Page configuration
+st.set_page_config(
+    page_title="Nexus NLP News Classifier",
+    page_icon="📰",
+    layout="wide"
+)
+# Cache model loading
 @st.cache_resource
+def initialize_models():
+    nlp, tokenizer, model = load_models()
+    knowledge_graph = load_knowledge_graph()
+    return nlp, tokenizer, model, knowledge_graph
+# Initialize all models
+nlp, tokenizer, model, knowledge_graph = initialize_models()
 # Streamlit UI
+def main():
+    st.title("📰 Nexus NLP News Classifier")
+    st.write("Enter news text below to analyze its authenticity")
+    # Text input area
+    news_text = st.text_area("News Text", height=200)
+    if st.button("Analyze"):
+        if news_text:
+            with st.spinner("Analyzing..."):
+                # Get predictions from all models
+                ml_prediction, ml_confidence = predict_with_model(news_text, tokenizer, model)
+                kg_prediction, kg_confidence = predict_with_knowledge_graph(text, knowledge_graph, nlp)
+                # Update knowledge graph
+                update_knowledge_graph(news_text, ml_prediction == "REAL", knowledge_graph, nlp)
+                # Get Gemini analysis
+                gemini_model = setup_gemini()
+                gemini_result = analyze_content_gemini(gemini_model, news_text)
+                # Display results in columns
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.subheader("ML Model Analysis")
+                    st.metric("Prediction", ml_prediction)
+                    st.metric("Confidence", f"{ml_confidence:.2f}%")
+                with col2:
+                    st.subheader("Knowledge Graph Analysis")
+                    st.metric("Prediction", kg_prediction)
+                    st.metric("Confidence", f"{kg_confidence:.2f}%")
+                with col3:
+                    st.subheader("Gemini Analysis")
+                    gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
+                    gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
+                    st.metric("Prediction", gemini_pred)
+                    st.metric("Confidence", f"{gemini_conf}%")
+                # Detailed analysis sections
+                with st.expander("View Detailed Analysis"):
+                    st.json(gemini_result)
+                with st.expander("Named Entities"):
+                    entities = extract_entities(news_text, nlp)
+                    df = pd.DataFrame(entities, columns=["Entity", "Type"])
+                    st.dataframe(df)
+        else:
+            st.warning("Please enter some text to analyze")
+if __name__ == "__main__":
+    main()

final.py CHANGED Viewed

@@ -1,50 +1,396 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import spacy
 import google.generativeai as genai
 import json
 import os
 import dotenv
 dotenv.load_dotenv()
-# Load spaCy for NER
-nlp = spacy.load("en_core_web_sm")
-# Load the trained ML model
-model_path = "./results/checkpoint-753"  # Replace with the actual path to your model
-# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
-# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
-from transformers import DebertaV2Tokenizer
-tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
-model = AutoModelForSequenceClassification.from_pretrained(model_path)
-model.eval()
 def setup_gemini():
     genai.configure(api_key=os.getenv("GEMINI_API"))
     model = genai.GenerativeModel('gemini-pro')
     return model
 def predict_with_model(text):
-    """Predict whether the news is real or fake using the ML model."""
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
     predicted_label = torch.argmax(probabilities, dim=-1).item()
-    return "FAKE" if predicted_label == 1 else "REAL"
 def extract_entities(text):
-    """Extract named entities from text using spaCy."""
     doc = nlp(text)
     entities = [(ent.text, ent.label_) for ent in doc.ents]
     return entities
-def predict_news(text):
-    """Predict whether the news is real or fake using the ML model."""
-    # Predict with the ML model
-    prediction = predict_with_model(text)
-    return prediction
 def analyze_content_gemini(model, text):
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
@@ -106,37 +452,61 @@ def analyze_content_gemini(model, text):
             }
         }
-def clean_gemini_output(text):
-    """Remove markdown formatting from Gemini output"""
-    text = text.replace('##', '')
-    text = text.replace('**', '')
-    return text
-def get_gemini_analysis(text):
-    """Get detailed content analysis from Gemini."""
-    gemini_model = setup_gemini()
-    gemini_analysis = analyze_content_gemini(gemini_model, text)
-    return gemini_analysis
-def main():
-    print("Welcome to the News Classifier!")
-    print("Enter your news text below. Type 'Exit' to quit.")
-    while True:
-        news_text = input("\nEnter news text: ")
-        if news_text.lower() == 'exit':
-            print("Thank you for using the News Classifier!")
-            return
-        # Get ML prediction
-        prediction = predict_news(news_text)
-        print(f"\nML Analysis: {prediction}")
-        # Get Gemini analysis
-        print("\n=== Detailed Gemini Analysis ===")
-        gemini_result = get_gemini_analysis(news_text)
-        print(gemini_result)
 if __name__ == "__main__":
     main()

+# import torch
+# from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# import networkx as nx
+# import spacy
+# import pickle
+# import pandas as pd
+# import google.generativeai as genai
+# import json
+# # Load spaCy for NER
+# nlp = spacy.load("en_core_web_sm")
+# # Load the trained ML model
+# model_path = "./results/checkpoint-5030"  # Replace with the actual path to your model
+# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
+# model = AutoModelForSequenceClassification.from_pretrained(model_path)
+# model.eval()
+# #########################
+# def setup_gemini():
+#     genai.configure(api_key='AIzaSyAQzWpSyWyYCM1G5f-G0ulRCQkXuY7admA')
+#     model = genai.GenerativeModel('gemini-pro')
+#     return model
+# #########################
+# # Load the knowledge graph
+# graph_path = "./models/knowledge_graph.pkl"  # Replace with the actual path to your knowledge graph
+# with open(graph_path, 'rb') as f:
+#     graph_data = pickle.load(f)
+# knowledge_graph = nx.DiGraph()
+# knowledge_graph.add_nodes_from(graph_data['nodes'].items())
+# for u, edges in graph_data['edges'].items():
+#     for v, data in edges.items():
+#         knowledge_graph.add_edge(u, v, **data)
+# def predict_with_model(text):
+#     """Predict whether the news is real or fake using the ML model."""
+#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+#     with torch.no_grad():
+#         outputs = model(**inputs)
+#     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+#     predicted_label = torch.argmax(probabilities, dim=-1).item()
+#     return "FAKE" if predicted_label == 1 else "REAL"
+# def update_knowledge_graph(text, is_real):
+#     """Update the knowledge graph with the new article."""
+#     entities = extract_entities(text)
+#     for entity, entity_type in entities:
+#         if not knowledge_graph.has_node(entity):
+#             knowledge_graph.add_node(
+#                 entity,
+#                 type=entity_type,
+#                 real_count=1 if is_real else 0,
+#                 fake_count=0 if is_real else 1
+#             )
+#         else:
+#             if is_real:
+#                 knowledge_graph.nodes[entity]['real_count'] += 1
+#             else:
+#                 knowledge_graph.nodes[entity]['fake_count'] += 1
+#     for i, (entity1, _) in enumerate(entities):
+#         for entity2, _ in entities[i+1:]:
+#             if not knowledge_graph.has_edge(entity1, entity2):
+#                 knowledge_graph.add_edge(
+#                     entity1,
+#                     entity2,
+#                     weight=1,
+#                     is_real=is_real
+#                 )
+#             else:
+#                 knowledge_graph[entity1][entity2]['weight'] += 1
+# def extract_entities(text):
+#     """Extract named entities from text using spaCy."""
+#     doc = nlp(text)
+#     entities = [(ent.text, ent.label_) for ent in doc.ents]
+#     return entities
+# def predict_with_knowledge_graph(text):
+#     """Predict whether the news is real or fake using the knowledge graph."""
+#     entities = extract_entities(text)
+#     real_score = 0
+#     fake_score = 0
+#     for entity, _ in entities:
+#         if knowledge_graph.has_node(entity):
+#             real_count = knowledge_graph.nodes[entity].get('real_count', 0)
+#             fake_count = knowledge_graph.nodes[entity].get('fake_count', 0)
+#             total = real_count + fake_count
+#             if total > 0:
+#                 real_score += real_count / total
+#                 fake_score += fake_count / total
+#     if real_score > fake_score:
+#         return "REAL"
+#     else:
+#         return "FAKE"
+# def predict_news(text):
+#     """Predict whether the news is real or fake using both the ML model and the knowledge graph."""
+#     # Predict with the ML model
+#     ml_prediction = predict_with_model(text)
+#     is_real = ml_prediction == "REAL"
+#     # Update the knowledge graph
+#     update_knowledge_graph(text, is_real)
+#     # Predict with the knowledge graph
+#     kg_prediction = predict_with_knowledge_graph(text)
+#     # Combine predictions (for simplicity, we use the ML model's prediction here)
+#     # You can enhance this by combining the scores from both predictions
+#     return ml_prediction if ml_prediction == kg_prediction else "UNCERTAIN"
+# #########################
+# # def analyze_content_gemini(model, text):
+# #     prompt = f"""Analyze this news text and provide results in the following JSON-like format:
+# #     TEXT: {text}
+# #     Please provide analysis in these specific sections:
+# #     1. GEMINI ANALYSIS:
+# #        - Predicted Classification: [Real/Fake]
+# #        - Confidence Score: [0-100%]
+# #        - Reasoning: [Key points for classification]
+# #     2. TEXT CLASSIFICATION:
+# #         - Content category/topic
+# #         - Writing style: [Formal/Informal/Clickbait]
+# #         - Target audience
+# #         - Content type: [news/opinion/editorial]
+# #     3. SENTIMENT ANALYSIS:
+# #        - Primary emotion
+# #        - Emotional intensity (1-10)
+# #        - Sensationalism Level: [High/Medium/Low]
+# #        - Bias Indicators: [List if any]
+# #        - Tone: (formal/informal), [Professional/Emotional/Neutral]
+# #        - Key emotional triggers
+# #     4. ENTITY RECOGNITION:
+# #         - Source Credibility: [High/Medium/Low]
+# #        - People mentioned
+# #        - Organizations
+# #        - Locations
+# #        - Dates/Time references
+# #        - Key numbers/statistics
+# #     5. CONTEXT EXTRACTION:
+# #        - Main narrative/story
+# #        - Supporting elements
+# #        - Key claims
+# #        - Narrative structure
+# #     6. FACT CHECKING:
+# #        - Verifiable Claims: [List main claims]
+# #        - Evidence Present: [Yes/No]
+# #        - Fact Check Score: [0-100%]
+# #     Format the response clearly with distinct sections."""
+# #     response = model.generate_content(prompt)
+# #     return response.text
+# def analyze_content_gemini(model, text):
+#     prompt = f"""Analyze this news text and return a JSON object with the following structure:
+#     {{
+#         "gemini_analysis": {{
+#             "predicted_classification": "Real or Fake",
+#             "confidence_score": "0-100",
+#             "reasoning": ["point1", "point2"]
+#         }},
+#         "text_classification": {{
+#             "category": "",
+#             "writing_style": "Formal/Informal/Clickbait",
+#             "target_audience": "",
+#             "content_type": "news/opinion/editorial"
+#         }},
+#         "sentiment_analysis": {{
+#             "primary_emotion": "",
+#             "emotional_intensity": "1-10",
+#             "sensationalism_level": "High/Medium/Low",
+#             "bias_indicators": ["bias1", "bias2"],
+#             "tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
+#             "emotional_triggers": ["trigger1", "trigger2"]
+#         }},
+#         "entity_recognition": {{
+#             "source_credibility": "High/Medium/Low",
+#             "people": ["person1", "person2"],
+#             "organizations": ["org1", "org2"],
+#             "locations": ["location1", "location2"],
+#             "dates": ["date1", "date2"],
+#             "statistics": ["stat1", "stat2"]
+#         }},
+#         "context": {{
+#             "main_narrative": "",
+#             "supporting_elements": ["element1", "element2"],
+#             "key_claims": ["claim1", "claim2"],
+#             "narrative_structure": ""
+#         }},
+#         "fact_checking": {{
+#             "verifiable_claims": ["claim1", "claim2"],
+#             "evidence_present": "Yes/No",
+#             "fact_check_score": "0-100"
+#         }}
+#     }}
+#     Analyze this text and return only the JSON response: {text}"""
+#     response = model.generate_content(prompt)
+#     # return json.loads(response.text)
+#     # Add error handling and response cleaning
+#     try:
+#         # Clean the response text to ensure it's valid JSON
+#         cleaned_text = response.text.strip()
+#         if cleaned_text.startswith('```json'):
+#             cleaned_text = cleaned_text[7:-3]  # Remove ```json and ``` markers
+#         return json.loads(cleaned_text)
+#     except json.JSONDecodeError:
+#         # Return a default structured response if JSON parsing fails
+#         return {
+#             "gemini_analysis": {
+#                 "predicted_classification": "UNCERTAIN",
+#                 "confidence_score": "50",
+#                 "reasoning": ["Analysis failed to generate valid JSON"]
+#             }
+#         }
+# def clean_gemini_output(text):
+#     """Remove markdown formatting from Gemini output"""
+#     text = text.replace('##', '')
+#     text = text.replace('**', '')
+#     return text
+# def get_gemini_analysis(text):
+#     """Get detailed content analysis from Gemini."""
+#     gemini_model = setup_gemini()
+#     gemini_analysis = analyze_content_gemini(gemini_model, text)
+#     # cleaned_analysis = clean_gemini_output(gemini_analysis)
+#     # return cleaned_analysis
+#     return gemini_analysis
+# #########################
+# def main():
+#     print("Welcome to the News Classifier!")
+#     print("Enter your news text below. Type 'Exit' to quit.")
+#     while True:
+#         news_text = input("\nEnter news text: ")
+#         if news_text.lower() == 'exit':
+#             print("Thank you for using the News Classifier!")
+#             return
+#         # First get ML and Knowledge Graph prediction
+#         prediction = predict_news(news_text)
+#         print(f"\nML and Knowledge Graph Analysis: {prediction}")
+#         # Then get Gemini analysis
+#         print("\n=== Detailed Gemini Analysis ===")
+#         gemini_result = get_gemini_analysis(news_text)
+#         print(gemini_result)
+# if __name__ == "__main__":
+#     main()
+import streamlit as st
 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, DebertaV2Tokenizer
+import networkx as nx
 import spacy
+import pickle
 import google.generativeai as genai
 import json
 import os
 import dotenv
+# Page config
+st.set_page_config(
+    page_title="Nexus NLP News Classifier",
+    page_icon="📰",
+    layout="wide"
+)
+# Load environment variables
 dotenv.load_dotenv()
+# Load models and resources
+@st.cache_resource
+def load_nlp():
+    return spacy.load("en_core_web_sm")
+@st.cache_resource
+def load_model():
+    model_path = "./results/checkpoint-753"
+    tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+    model.eval()
+    return tokenizer, model
+@st.cache_resource
+def load_knowledge_graph():
+    graph_path = "./knowledge_graph_final.pkl"
+    with open(graph_path, 'rb') as f:
+        graph_data = pickle.load(f)
+    knowledge_graph = nx.DiGraph()
+    knowledge_graph.add_nodes_from(graph_data['nodes'].items())
+    for u, edges in graph_data['edges'].items():
+        for v, data in edges.items():
+            knowledge_graph.add_edge(u, v, **data)
+    return knowledge_graph
 def setup_gemini():
     genai.configure(api_key=os.getenv("GEMINI_API"))
     model = genai.GenerativeModel('gemini-pro')
     return model
+# Initialize resources
+nlp = load_nlp()
+tokenizer, model = load_model()
+knowledge_graph = load_knowledge_graph()
 def predict_with_model(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
     predicted_label = torch.argmax(probabilities, dim=-1).item()
+    confidence = probabilities[0][predicted_label].item() * 100
+    return "FAKE" if predicted_label == 1 else "REAL", confidence
 def extract_entities(text):
     doc = nlp(text)
     entities = [(ent.text, ent.label_) for ent in doc.ents]
     return entities
+def update_knowledge_graph(text, is_real):
+    entities = extract_entities(text)
+    for entity, entity_type in entities:
+        if not knowledge_graph.has_node(entity):
+            knowledge_graph.add_node(
+                entity,
+                type=entity_type,
+                real_count=1 if is_real else 0,
+                fake_count=0 if is_real else 1
+            )
+        else:
+            if is_real:
+                knowledge_graph.nodes[entity]['real_count'] += 1
+            else:
+                knowledge_graph.nodes[entity]['fake_count'] += 1
+    for i, (entity1, _) in enumerate(entities):
+        for entity2, _ in entities[i+1:]:
+            if not knowledge_graph.has_edge(entity1, entity2):
+                knowledge_graph.add_edge(
+                    entity1,
+                    entity2,
+                    weight=1,
+                    is_real=is_real
+                )
+            else:
+                knowledge_graph[entity1][entity2]['weight'] += 1
+def predict_with_knowledge_graph(text):
+    entities = extract_entities(text)
+    real_score = 0
+    fake_score = 0
+    for entity, _ in entities:
+        if knowledge_graph.has_node(entity):
+            real_count = knowledge_graph.nodes[entity].get('real_count', 0)
+            fake_count = knowledge_graph.nodes[entity].get('fake_count', 0)
+            total = real_count + fake_count
+            if total > 0:
+                real_score += real_count / total
+                fake_score += fake_count / total
+    total_score = real_score + fake_score
+    if total_score == 0:
+        return "UNCERTAIN", 50.0
+    if real_score > fake_score:
+        confidence = (real_score / total_score) * 100
+        return "REAL", confidence
+    else:
+        confidence = (fake_score / total_score) * 100
+        return "FAKE", confidence
 def analyze_content_gemini(model, text):
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
             }
         }
+def main():
+    st.title("📰 Nexus NLP News Classifier")
+    st.write("Enter news text below to analyze its authenticity")
+    # Query parameters for API functionality
+    query_params = st.query_params
+    if "text" in query_params:
+        text_input = query_params["text"][0]
+        ml_prediction, ml_confidence = predict_with_model(text_input)
+        st.json({"prediction": ml_prediction, "confidence": ml_confidence})
+        return
+    # Regular UI
+    news_text = st.text_area("News Text", height=200)
+    if st.button("Analyze"):
+        if news_text:
+            with st.spinner("Analyzing..."):
+                # Get all predictions
+                ml_prediction, ml_confidence = predict_with_model(news_text)
+                kg_prediction, kg_confidence = predict_with_knowledge_graph(news_text)
+                update_knowledge_graph(news_text, ml_prediction == "REAL")
+                gemini_model = setup_gemini()
+                gemini_result = analyze_content_gemini(gemini_model, news_text)
+                # Display results
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.subheader("ML Model Analysis")
+                    st.metric("Prediction", ml_prediction)
+                    st.metric("Confidence", f"{ml_confidence:.2f}%")
+                with col2:
+                    st.subheader("Knowledge Graph Analysis")
+                    st.metric("Prediction", kg_prediction)
+                    st.metric("Confidence", f"{kg_confidence:.2f}%")
+                with col3:
+                    st.subheader("Gemini Analysis")
+                    gemini_pred = gemini_result["gemini_analysis"]["predicted_classification"]
+                    gemini_conf = gemini_result["gemini_analysis"]["confidence_score"]
+                    st.metric("Prediction", gemini_pred)
+                    st.metric("Confidence", f"{gemini_conf}%")
+                with st.expander("View Detailed Analysis"):
+                    st.json(gemini_result)
+                with st.expander("Named Entities"):
+                    entities = extract_entities(news_text)
+                    st.write(entities)
+        else:
+            st.warning("Please enter some text to analyze")
 if __name__ == "__main__":
     main()

knowledge_graph_final.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f941e2c0b588a89f20e59aefd71c455696b291c88277672d997ea144164f70e8
+size 10584988

prev_final.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import spacy
+import google.generativeai as genai
+import json
+import os
+import dotenv
+dotenv.load_dotenv()
+# Load spaCy for NER
+nlp = spacy.load("en_core_web_sm")
+# Load the trained ML model
+model_path = "./results/checkpoint-753"  # Replace with the actual path to your model
+# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
+# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
+from transformers import DebertaV2Tokenizer
+tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-small')
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+model.eval()
+def setup_gemini():
+    genai.configure(api_key=os.getenv("GEMINI_API"))
+    model = genai.GenerativeModel('gemini-pro')
+    return model
+def predict_with_model(text):
+    """Predict whether the news is real or fake using the ML model."""
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    predicted_label = torch.argmax(probabilities, dim=-1).item()
+    return "FAKE" if predicted_label == 1 else "REAL"
+def extract_entities(text):
+    """Extract named entities from text using spaCy."""
+    doc = nlp(text)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    return entities
+def predict_news(text):
+    """Predict whether the news is real or fake using the ML model."""
+    # Predict with the ML model
+    prediction = predict_with_model(text)
+    return prediction
+def analyze_content_gemini(model, text):
+    prompt = f"""Analyze this news text and return a JSON object with the following structure:
+    {{
+        "gemini_analysis": {{
+            "predicted_classification": "Real or Fake",
+            "confidence_score": "0-100",
+            "reasoning": ["point1", "point2"]
+        }},
+        "text_classification": {{
+            "category": "",
+            "writing_style": "Formal/Informal/Clickbait",
+            "target_audience": "",
+            "content_type": "news/opinion/editorial"
+        }},
+        "sentiment_analysis": {{
+            "primary_emotion": "",
+            "emotional_intensity": "1-10",
+            "sensationalism_level": "High/Medium/Low",
+            "bias_indicators": ["bias1", "bias2"],
+            "tone": {{"formality": "formal/informal", "style": "Professional/Emotional/Neutral"}},
+            "emotional_triggers": ["trigger1", "trigger2"]
+        }},
+        "entity_recognition": {{
+            "source_credibility": "High/Medium/Low",
+            "people": ["person1", "person2"],
+            "organizations": ["org1", "org2"],
+            "locations": ["location1", "location2"],
+            "dates": ["date1", "date2"],
+            "statistics": ["stat1", "stat2"]
+        }},
+        "context": {{
+            "main_narrative": "",
+            "supporting_elements": ["element1", "element2"],
+            "key_claims": ["claim1", "claim2"],
+            "narrative_structure": ""
+        }},
+        "fact_checking": {{
+            "verifiable_claims": ["claim1", "claim2"],
+            "evidence_present": "Yes/No",
+            "fact_check_score": "0-100"
+        }}
+    }}
+    Analyze this text and return only the JSON response: {text}"""
+    response = model.generate_content(prompt)
+    try:
+        cleaned_text = response.text.strip()
+        if cleaned_text.startswith('```json'):
+            cleaned_text = cleaned_text[7:-3]
+        return json.loads(cleaned_text)
+    except json.JSONDecodeError:
+        return {
+            "gemini_analysis": {
+                "predicted_classification": "UNCERTAIN",
+                "confidence_score": "50",
+                "reasoning": ["Analysis failed to generate valid JSON"]
+            }
+        }
+def clean_gemini_output(text):
+    """Remove markdown formatting from Gemini output"""
+    text = text.replace('##', '')
+    text = text.replace('**', '')
+    return text
+def get_gemini_analysis(text):
+    """Get detailed content analysis from Gemini."""
+    gemini_model = setup_gemini()
+    gemini_analysis = analyze_content_gemini(gemini_model, text)
+    return gemini_analysis
+def main():
+    print("Welcome to the News Classifier!")
+    print("Enter your news text below. Type 'Exit' to quit.")
+    while True:
+        news_text = input("\nEnter news text: ")
+        if news_text.lower() == 'exit':
+            print("Thank you for using the News Classifier!")
+            return
+        # Get ML prediction
+        prediction = predict_news(news_text)
+        print(f"\nML Analysis: {prediction}")
+        # Get Gemini analysis
+        print("\n=== Detailed Gemini Analysis ===")
+        gemini_result = get_gemini_analysis(news_text)
+        print(gemini_result)
+if __name__ == "__main__":
+    main()

test.py CHANGED Viewed

@@ -1,14 +1,51 @@
-import requests
-# Replace with your actual Hugging Face Spaces URL
-SPACE_API_URL = "https://your-username-your-app.hf.space/?text=Breaking: Stock market crashes!"
-# Send request to Streamlit API
-response = requests.get(SPACE_API_URL)
-# Parse JSON response
-if response.status_code == 200:
-    result = response.json()
-    print(f"Prediction: {result['prediction']} (Confidence: {result['confidence']*100:.2f}%)")
-else:
-    print("Error: Could not get prediction")

+# import requests
+# import json
+# # Replace with your actual Hugging Face Spaces URL
+# SPACE_API_URL = "https://heheboi0769-nexus-nlp-model.hf.space//?text=Breaking: Stock market crashes!"
+# # Add the text as a query parameter since the app uses st.experimental_get_query_params()
+# text = "Breaking: Stock market crashes!"
+# url_with_params = f"{SPACE_API_URL}?text={text}"
+# # Send request to Streamlit API
+# response = requests.get(url_with_params)
+# # Parse JSON response
+# if response.status_code == 200:
+#     result = response.json()
+#     print(f"Prediction: {result['prediction']} (Confidence: {result['confidence']*100:.2f}%)")
+# else:
+#     print("Error: Could not get prediction")
+import requests
+import urllib.parse
+def test_model():
+    # Base URL for your Streamlit app
+    base_url = "https://heheboi0769-nexus-nlp-model.hf.space/api"
+    # Test text
+    text = "Breaking: Stock market crashes!"
+    # Make request to the Streamlit app's API endpoint
+    response = requests.post(
+        f"{base_url}/predict",
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": "Bearer your_api_key_here"
+        },
+        json={"text": text}
+    )
+    # Print response for debugging
+    print(f"Status Code: {response.status_code}")
+    print(f"Response: {response.text}")
+    if response.status_code == 200:
+        result = response.json()
+        print(f"Prediction: {result['prediction']}")
+        print(f"Confidence: {result['confidence']*100:.2f}%")
+if __name__ == "__main__":
+    test_model()