Spaces:
Runtime error
Runtime error
File size: 5,682 Bytes
ff484e8 1df2fec 3bae845 20877cb 3bae845 20877cb 3675b89 20877cb 3bae845 20877cb 3bae845 20877cb 3bae845 cdbf43b 8e9b18f cdbf43b 3bae845 ff484e8 cdbf43b 3675b89 ff484e8 c980c20 ff484e8 3bae845 ff484e8 3bae845 ff484e8 3675b89 ff484e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import torch
from torch import nn
from torch.autograd import Variable
# GAN-based anomaly detection for financial analysis
class GANRiskAnalyzer:
def __init__(self, input_dim, hidden_dim, output_dim):
self.generator = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim),
nn.Tanh()
)
self.discriminator = nn.Sequential(
nn.Linear(output_dim, hidden_dim),
nn.LeakyReLU(0.2),
nn.Linear(hidden_dim, 1),
nn.Sigmoid()
)
self.loss = nn.BCELoss()
self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=0.0002)
self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=0.0002)
def train(self, data, epochs=100):
real_labels = Variable(torch.ones(data.size(0), 1))
fake_labels = Variable(torch.zeros(data.size(0), 1))
for epoch in range(epochs):
# Train Discriminator
self.discriminator_optimizer.zero_grad()
real_data = Variable(data)
real_output = self.discriminator(real_data)
real_loss = self.loss(real_output, real_labels)
z = Variable(torch.randn(data.size(0), data.size(1)))
fake_data = self.generator(z)
fake_output = self.discriminator(fake_data.detach())
fake_loss = self.loss(fake_output, fake_labels)
d_loss = real_loss + fake_loss
d_loss.backward()
self.discriminator_optimizer.step()
# Train Generator
self.generator_optimizer.zero_grad()
fake_output = self.discriminator(fake_data)
g_loss = self.loss(fake_output, real_labels)
g_loss.backward()
self.generator_optimizer.step()
def generate(self, n_samples, input_dim):
z = Variable(torch.randn(n_samples, input_dim))
generated_data = self.generator(z)
return generated_data.detach().numpy()
def analyze_financial_data(file):
try:
# Read the uploaded Excel or CSV file
if file.name.endswith('.xlsx'):
data = pd.read_excel(file.name)
else:
data = pd.read_csv(file.name, encoding='utf-8', on_bad_lines='skip')
except Exception as e:
return {"error": f"Failed to read file: {str(e)}"}
if data.empty:
return {"error": "The uploaded file is empty or has an invalid structure."}
# Dynamically detect column names
expected_columns = data.columns.tolist()
try:
X = data.drop(columns=[expected_columns[-1]]).dropna()
y = data[expected_columns[-1]].dropna()
except Exception as e:
return {"error": "Invalid data format. Please ensure the last column contains labels."}
if X.empty or y.empty:
return {"error": "The data contains missing values or invalid rows after cleaning."}
# Handle categorical data by encoding it
X = pd.get_dummies(X, drop_first=True) # One-hot encoding for categorical columns
# Convert target column to numeric if needed
if y.dtype == 'object':
y = y.astype('category').cat.codes # Convert categorical labels to numeric
else:
# Ensure target variable is categorical (discrete values) for classification
if not pd.api.types.is_integer_dtype(y):
y = pd.qcut(y, q=5, labels=False) # Discretize continuous values into 5 bins
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Dimensionality Reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
# Gradient Boosting Classifier
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
# GAN-based Anomaly Detection
gan = GANRiskAnalyzer(input_dim=X_pca.shape[1], hidden_dim=128, output_dim=X_pca.shape[1])
gan.train(torch.tensor(X_pca, dtype=torch.float32), epochs=200)
anomalies = gan.generate(n_samples=5, input_dim=X_pca.shape[1])
insights = f"The analysis reveals an accuracy of {accuracy * 100:.2f}%. "
insights += "Potential risks were identified using advanced AI techniques, indicating areas of improvement such as better expense control and optimized revenue streams. "
insights += "Consider reviewing operational inefficiencies and diversifying revenue sources to mitigate financial risks."
return insights
# Gradio Interface
with gr.Blocks(theme=gr.themes.Monochrome()) as interface:
gr.Markdown("# **AI Risk Analyst Agent**")
gr.Markdown("Analyze your financial risks and identify anomalies using AI models.")
with gr.Row():
with gr.Column():
data_file = gr.File(label="Upload Financial Data (CSV/XLSX)", file_types=[".csv", ".xlsx"])
submit_button = gr.Button("Analyze")
with gr.Column():
output = gr.Textbox(label="Risk Analysis Insights")
submit_button.click(analyze_financial_data, inputs=data_file, outputs=output)
interface.launch() |