import streamlit as st
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

# Page configuration
st.set_page_config(
    page_title="DiverseVul Code Vulnerability Classifier",
    page_icon="🔍",
    layout="wide"
)

# Example code snippets
VULNERABLE_EXAMPLE = """static int cirrus_bitblt_videotovideo_patterncopy(CirrusVGAState * s)\n{\n    
return cirrus_bitblt_common_patterncopy(s,\n\t\t\t\t\t    s->vram_ptr +\n                                            (s->cirrus_blt_srcaddr & ~7));\n}"""

NON_VULNERABLE_EXAMPLE = """static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
\n const TranslationBlock *tb)\n{\n    LoongArchCPU *cpu = LOONGARCH_CPU(cs);\n    CPULoongArchState *env = &cpu->env;\n\n    env->pc = tb->pc;\n}"""

def classify_code_sample(code_sample, model, tokenizer, device, max_length=512):
    """Classify a single code sample and get probabilities"""
    inputs = tokenizer(
        code_sample,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors="pt"
    ).to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    
    probabilities = F.softmax(logits, dim=-1).squeeze().cpu().numpy()
    return probabilities

def main():
    st.title("DiverseVul Code Vulnerability Classifier")
    st.write("""
    This tool analyzes code snippets for various types of vulnerabilities, including but not limited to:
    - Security vulnerabilities (e.g., buffer overflows, injection flaws)
    - Memory management issues
    - Concurrency problems
    - Resource leaks
    - Logic errors
    - Performance issues
    - Reliability problems
    """)

    # Load model and tokenizer directly
    try:
        with st.spinner("Loading model..."):
            model_name = "moazx/Code-Vulnerability-Classifier_app"  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
            
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = model.to(device)
            model.eval()
        st.success("Model loaded successfully!")
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return

    # Example buttons
    st.subheader("Try an Example")
    col1, col2 = st.columns(2)
    with col1:
        if st.button("📋 Load Vulnerable Example"):
            st.session_state['code_input'] = VULNERABLE_EXAMPLE
    with col2:
        if st.button("📋 Load Non-Vulnerable Example"):
            st.session_state['code_input'] = NON_VULNERABLE_EXAMPLE

    # Input area
    st.subheader("Input Code")
    code_input = st.text_area(
        "Enter your code snippet here:",
        value=st.session_state.get('code_input', ''),
        height=300,
        help="Paste your code here for comprehensive vulnerability analysis"
    )

    # Analysis button
    if st.button("Analyze Code"):
        if not code_input.strip():
            st.warning("Please enter some code to analyze.")
            return

        with st.spinner("Analyzing code..."):
            try:
                # Get predictions
                probabilities = classify_code_sample(code_input, model, tokenizer, device)
                
                # Create results section
                st.subheader("Analysis Results")
                
                # Display prediction with confidence
                class_names = ["Non-vulnerable", "Vulnerable"]
                predicted_class_index = probabilities.argmax()
                predicted_class = class_names[predicted_class_index]
                confidence = probabilities[predicted_class_index] * 100

                # Create columns for layout
                col1, col2 = st.columns(2)
                
                # Display prediction and confidence
                with col1:
                    st.metric(
                        "Prediction",
                        predicted_class,
                        help="The model's classification of the code"
                    )
                
                with col2:
                    st.metric(
                        "Confidence",
                        f"{confidence:.1f}%",
                        help="How confident the model is in its prediction"
                    )

                # Create a DataFrame for detailed probabilities
                results_df = pd.DataFrame({
                    'Class': class_names,
                    'Probability': probabilities
                })
                
                # Display probability distribution
                st.subheader("Detailed Probabilities")
                st.bar_chart(
                    results_df.set_index('Class')['Probability']
                )

                # Additional information and disclaimers
                if predicted_class == "Vulnerable":
                    st.warning("""
                        ⚠️ This code has been flagged as potentially vulnerable. 
                        Please review it carefully for various types of vulnerabilities including:
                        
                        Security:
                        - Input validation
                        - Authentication issues
                        - Access control problems
                        
                        Implementation:
                        - Memory management
                        - Resource handling
                        - Error handling
                        
                        Design:
                        - Concurrency issues
                        - Logic errors
                        - Performance problems
                        
                        Best Practices:
                        - Code structure
                        - Error handling patterns
                        - Resource cleanup
                    """)
                
                st.info("""
                    Note: This tool is trained on the DiverseVul dataset, which covers 150 different 
                    types of Common Weakness Enumeration (CWE) categories. While comprehensive, it 
                    should be used as part of a larger code review process. False positives and 
                    negatives are possible.
                """)

            except Exception as e:
                st.error(f"Error during analysis: {str(e)}")

    # Add sidebar with information
    with st.sidebar:
        st.header("About")
        st.write("""
        This tool uses a machine learning model trained on the DiverseVul dataset, which includes:
        - 18,945 vulnerable functions
        - 330,492 non-vulnerable functions
        - 150 different CWE types
        - Code from thousands of real-world projects
        """)
        
        st.subheader("Example Code Explanation")
        st.write("""
        The vulnerable example contains:
        - SQL injection vulnerability
        - Path traversal vulnerability
        - Buffer overflow vulnerability
        
        The non-vulnerable example shows:
        - Parameterized SQL queries
        - Safe path validation
        - Proper buffer bounds checking
        """)
        
        st.subheader("How to Use")
        st.write("""
        1. Click an example button or paste your code
        2. Click 'Analyze Code'
        3. Review the results and probability scores
        4. Consider all flagged issues in context
        5. Verify findings with manual review
        """)
        
        st.subheader("Limitations")
        st.write("""
        - The model may not catch all vulnerabilities
        - Some safe code might be flagged as vulnerable
        - Results should be verified by domain experts
        - Performance varies across different CWE types
        - Best used as part of a comprehensive code review process
        """)

if __name__ == "__main__":
    main()