File size: 10,558 Bytes
b9a2f1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
import streamlit as st
import requests
import hashlib
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from io import BytesIO
from PIL import Image
from pefile import PE, PEFormatError
import os
import re

# VirusTotal API details
VIRUSTOTAL_API_KEY = 'ed48e6407e0b7975be7d19c797e1217f500183c9ae84d1119af8628ba4c98c3d'

# streamlit framework
st.set_page_config(
    page_title="OxThreat",
    page_icon="πŸ”",
    layout="wide"
)

# Function to calculate the file's SHA-256 hash
def get_file_hash(file):
    file.seek(0)  # Reset file pointer to the beginning
    file_hash = hashlib.sha256(file.read()).hexdigest()
    file.seek(0)  # Reset file pointer to the beginning
    return file_hash

# Function to analyze the file using VirusTotal
def virustotal_analysis(file_hash):
    url = f"https://www.virustotal.com/api/v3/files/{file_hash}"
    headers = {"x-apikey": VIRUSTOTAL_API_KEY}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        st.error("Error with VirusTotal API request. Please check your API key or the file hash.")
        return None

# Function to extract metadata from PE files
def extract_metadata(file):
    try:
        pe = PE(data=file.read())
        metadata = {
            "Number of Sections": pe.FILE_HEADER.NumberOfSections,
            "Time Date Stamp": pe.FILE_HEADER.TimeDateStamp,
            "Characteristics": pe.FILE_HEADER.Characteristics,
        }
        return metadata
    except PEFormatError:
        st.error("Uploaded file is not a valid PE format.")
        return None

# Function to analyze log files
def analyze_log_file(log_content):
    errors = re.findall(r'ERROR.*', log_content)
    return pd.DataFrame(errors, columns=["Errors"])

# Function to create charts from VirusTotal results
def create_virus_total_charts(virus_total_results):
    if not virus_total_results:
        return None
    
    stats = virus_total_results['data']['attributes']['last_analysis_stats']
    labels = list(stats.keys())
    values = list(stats.values())

    fig, ax = plt.subplots(figsize=(10, 5))
    sns.barplot(x=labels, y=values, palette="viridis", ax=ax)
    ax.set_title("VirusTotal Analysis Results", fontsize=16, fontweight='bold')
    ax.set_xlabel("Analysis Types", fontsize=14)
    ax.set_ylabel("Count", fontsize=14)

    return fig

# Function to create detailed tables from JSON data
def create_detailed_table(data, title):
    st.write(f"### {title}")
    
    # Normalize JSON data into a DataFrame
    df = pd.json_normalize(data)
    
    # Debug: Show raw data and DataFrame
    st.write("Raw Data:", data)

    if df.empty:
        st.write("No data available.")
    else:
        # Apply minimal styling for debugging
        styled_df = df.style.background_gradient(cmap='viridis') \
                          .format(na_rep='N/A', precision=2)
        
        # Display the styled DataFrame
        st.dataframe(styled_df)

# Function to display the analysis results on the dashboard
def display_analysis_results(metadata, virus_total_results, log_analysis=None):
    st.write("## Analysis Results")

    col1, col2 = st.columns([2, 1])

    # Metadata
    with col1:
        if metadata:
            st.write("### πŸ“‚ PE File Metadata")
            create_detailed_table(metadata, "PE File Metadata")

    # VirusTotal Results
    with col1:
        if virus_total_results:
            st.write("### 🦠 VirusTotal Results")
            create_detailed_table(virus_total_results['data'], "VirusTotal Results")
            st.write("#### πŸ“Š VirusTotal Analysis Stats")
            fig = create_virus_total_charts(virus_total_results)
            if fig:
                st.pyplot(fig)

    # Log Analysis
    with col2:
        if log_analysis is not None:
            st.write("### πŸ“ Log Analysis")
            st.table(log_analysis)

# Main page of the Streamlit app
def main_page():
    st.title("🦠 Malware Analysis Tool")
    st.markdown("---")
    st.image('ui/antivirus.png', width=200, use_column_width='always')

    if st.button("Go to File Analysis πŸ—‚οΈ"):
        st.session_state.page = "file_analysis"
        st.experimental_rerun()

# File analysis page where the user can upload files for analysis
def file_analysis_page():
    st.title("πŸ” File Analysis Dashboard")
    st.markdown("---")
    st.image('ui/antivirus.png', width=80, use_column_width='none')

    uploaded_file = st.file_uploader("Upload any file for analysis", type=["exe", "dll", "log", "pdf", "png", "jpg", "jpeg", "gif", "txt", "zip", "rar", "apk"])

    if uploaded_file:
        file_hash = get_file_hash(uploaded_file)
        st.write(f"SHA-256 Hash: {file_hash}")

        file_extension = uploaded_file.name.split('.')[-1].lower()

        # Handle different file types
        if file_extension in ['png', 'jpg', 'jpeg', 'gif']:
            st.write("### πŸ“„ Image Preview")
            image = Image.open(uploaded_file)
            image.thumbnail((150, 150))  # Resize for preview
            st.image(image, caption='Uploaded Image', use_column_width=True)
            metadata = None
            virus_total_results = None
            log_analysis = None

        elif file_extension == 'pdf':
            st.write("### πŸ“„ PDF File")
            st.write("PDF preview is not supported. Please use other tools to view.")
            st.download_button(label="Download PDF", data=uploaded_file, file_name=uploaded_file.name)
            metadata = None
            virus_total_results = None
            log_analysis = None

        elif file_extension in ['txt', 'log']:
            st.write("### πŸ“ Log File Content")
            log_content = uploaded_file.getvalue().decode("utf-8")
            log_analysis = analyze_log_file(log_content)
            metadata = None
            virus_total_results = None

        elif file_extension in ['zip', 'rar']:
            st.write("### πŸ“¦ Compressed File")
            st.write("Compressed files require further extraction and analysis.")
            metadata = None
            virus_total_results = None
            log_analysis = None

        elif file_extension in ['apk', 'exe', 'dll']:
            # Save uploaded file temporarily
            file_path = f"./temp/{uploaded_file.name}"
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            with open(file_path, "wb") as f:
                f.write(uploaded_file.getbuffer())

            try:
                with open(file_path, "rb") as file:
                    file_hash = get_file_hash(file)
                    metadata = extract_metadata(file)
                    virus_total_results = virustotal_analysis(file_hash)

            finally:
                # Clean up
                os.remove(file_path)
            
            log_analysis = None

        else:
            st.error("Unsupported file type.")
            metadata = None
            virus_total_results = None
            log_analysis = None

        display_analysis_results(metadata, virus_total_results, log_analysis)

# Initialize session state for page navigation
if 'page' not in st.session_state:
    st.session_state.page = "main"

# Routing based on page state
if st.session_state.page == "main":
    main_page()
elif st.session_state.page == "file_analysis":
    file_analysis_page()







def analyze_log_file(log_content):
    # Data storage structures for IPs, Domains, Headers, Sessions
    ip_data = []
    domain_data = []
    header_data = []
    session_data = []

    # Regular expressions for matching
    ip_regex = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
    domain_regex = re.compile(r'\b[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b')
    header_regex = re.compile(r'(User-Agent|Content-Type|Authorization):\s*(.*)', re.IGNORECASE)
    session_regex = re.compile(r'SessionID:\s*([a-zA-Z0-9]+)')

    log_entries = []

    for line in log_content.splitlines():
        # Match IPs
        ips = ip_regex.findall(line)
        if ips:
            ip_data.extend(ips)

        # Match Domains
        domains = domain_regex.findall(line)
        if domains:
            domain_data.extend(domains)

        # Match Headers
        headers = header_regex.findall(line)
        if headers:
            header_data.extend(headers)

        # Match Sessions
        sessions = session_regex.findall(line)
        if sessions:
            session_data.extend(sessions)

        log_entries.append(line)

    # Convert to DataFrame
    log_df = pd.DataFrame(log_entries, columns=["Log Entries"])

    # Additional DataFrames for captured data
    ip_df = pd.DataFrame(ip_data, columns=["IP Addresses"])
    domain_df = pd.DataFrame(domain_data, columns=["Domains"])
    header_df = pd.DataFrame(header_data, columns=["Header Name", "Header Value"])
    session_df = pd.DataFrame(session_data, columns=["Session IDs"])

    # Summary of findings
    summary = {
        "log_dataframe": log_df,
        "ip_dataframe": ip_df,
        "domain_dataframe": domain_df,
        "header_dataframe": header_df,
        "session_dataframe": session_df
    }

    return summary

# Log Analysis Section
if log_analysis is not None:
    st.write("### πŸ“ Log Analysis")

    # First row: IP Addresses and Domains
    col1, col2 = st.columns(2)

    with col1:
        st.write("**IP Addresses:**")
        st.dataframe(log_analysis.get("ip_dataframe"))

    with col2:
        st.write("**Domains:**")
        st.dataframe(log_analysis.get("domain_dataframe"))

    # Second row: Log Entries, Session IDs, Headers
    col3, col4, col5 = st.columns([2, 1, 1])

    with col3:
        st.write("**Log Entries:**")
        st.dataframe(log_analysis.get("log_dataframe"))

    with col4:
        st.write("**Session IDs:**")
        if not log_analysis.get("session_dataframe").empty:
            st.dataframe(log_analysis.get("session_dataframe"))
        else:
            st.write("No session IDs found.")

    with col5:
        st.write("**Headers:**")
        if not log_analysis.get("header_dataframe").empty:
            st.dataframe(log_analysis.get("header_dataframe"))
        else:
            st.write("No headers found.")