Canstralian commited on
Commit
2ce6627
·
verified ·
1 Parent(s): f922827

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -3
app.py CHANGED
@@ -8,13 +8,22 @@ from src.fine_tune_helpers import fine_tune_model
8
  st.title("OSINT Tool 🏢")
9
  st.markdown("""
10
  This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
11
- It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
12
  """)
13
-
14
  # Sidebar for navigation
15
  st.sidebar.title("Navigation")
16
  app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
17
 
 
 
 
 
 
 
 
 
 
18
  # GitHub Repository Analysis
19
  if app_mode == "GitHub Repository Analysis":
20
  st.header("GitHub Repository Analysis")
@@ -52,7 +61,50 @@ elif app_mode == "URL Title Fetcher":
52
  elif app_mode == "Dataset Upload & Fine-Tuning":
53
  st.header("Dataset Upload & Fine-Tuning")
54
 
 
 
 
 
55
  uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")
56
 
57
  if uploaded_file is not None:
58
- fine_tune_model(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  st.title("OSINT Tool 🏢")
9
  st.markdown("""
10
  This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
11
+ It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**, **Code Summarization**, **Bug Fixing**, and more.
12
  """)
13
+
14
  # Sidebar for navigation
15
  st.sidebar.title("Navigation")
16
  app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
17
 
18
+ # List of models for fine-tuning
19
+ available_models = [
20
+ "semeru/code-text-galeras-code-summarization-3k-deduped",
21
+ "semeru/code-code-InjectMutants",
22
+ "semeru/code-code-BugFixingSmall",
23
+ "semeru/code-code-GeneratingAssertsRaw",
24
+ "deepseek-ai/DeepSeek-Prover-V1"
25
+ ]
26
+
27
  # GitHub Repository Analysis
28
  if app_mode == "GitHub Repository Analysis":
29
  st.header("GitHub Repository Analysis")
 
61
  elif app_mode == "Dataset Upload & Fine-Tuning":
62
  st.header("Dataset Upload & Fine-Tuning")
63
 
64
+ # Model selection for fine-tuning
65
+ model_choice = st.selectbox("Choose Model for Fine-Tuning", available_models)
66
+
67
+ # Upload a CSV file for fine-tuning
68
  uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")
69
 
70
  if uploaded_file is not None:
71
+ st.write(f"Preparing fine-tuning for model: **{model_choice}**")
72
+ st.write("File successfully uploaded! Now starting fine-tuning process...")
73
+ fine_tune_model(uploaded_file, model_choice) # Assuming the fine_tune_model function handles fine-tuning
74
+
75
+ # Helper Functions for API Interaction
76
+ def analyze_github_repo(owner, repo):
77
+ """Analyzes a GitHub repository and returns information about it."""
78
+ try:
79
+ response = requests.get(f'https://api.github.com/repos/{owner}/{repo}')
80
+ response.raise_for_status()
81
+ repo_data = response.json()
82
+ return {
83
+ "Repository Name": repo_data['name'],
84
+ "Owner": repo_data['owner']['login'],
85
+ "Stars": repo_data['stargazers_count'],
86
+ "Forks": repo_data['forks_count'],
87
+ "Issues": repo_data['open_issues_count'],
88
+ "Language": repo_data['language'],
89
+ "Description": repo_data.get('description', 'No description available.')
90
+ }
91
+ except requests.exceptions.RequestException as e:
92
+ st.error(f"Error fetching GitHub repository: {e}")
93
+ return None
94
+
95
+ def fetch_url_title(url):
96
+ """Fetches the title of a webpage."""
97
+ try:
98
+ response = requests.get(url)
99
+ if response.status_code == 200:
100
+ # Extract the title from the HTML content
101
+ html_content = response.text
102
+ start_index = html_content.find("<title>") + len("<title>")
103
+ end_index = html_content.find("</title>")
104
+ return html_content[start_index:end_index]
105
+ else:
106
+ st.error(f"Failed to fetch URL: {response.status_code}")
107
+ return None
108
+ except requests.exceptions.RequestException as e:
109
+ st.error(f"Error fetching URL: {e}")
110
+ return None