pendar02 commited on
Commit
b6f12ab
Β·
verified Β·
1 Parent(s): e7b282d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -3
app.py CHANGED
@@ -82,6 +82,34 @@ def cleanup_model(model, tokenizer):
82
  except Exception:
83
  pass
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  @st.cache_data
86
  def process_excel(uploaded_file):
87
  """Process uploaded Excel file"""
@@ -95,6 +123,25 @@ def process_excel(uploaded_file):
95
  if missing_columns:
96
  st.error(f"Missing required columns: {', '.join(missing_columns)}")
97
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  return df[required_columns]
100
  except Exception as e:
@@ -318,10 +365,24 @@ def create_filter_controls(df, sort_column):
318
 
319
  def main():
320
  st.title("πŸ”¬ Biomedical Papers Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  # File upload section
323
  uploaded_file = st.file_uploader(
324
- "Upload Excel file containing papers",
325
  type=['xlsx', 'xls'],
326
  help="File must contain: Abstract, Article Title, Authors, Source Title, Publication Year, DOI"
327
  )
@@ -331,12 +392,17 @@ def main():
331
  question = ""
332
 
333
  if uploaded_file is not None:
334
- # Process Excel file
335
  if st.session_state.processed_data is None:
336
  with st.spinner("Processing file..."):
337
  df = process_excel(uploaded_file)
338
  if df is not None:
339
- st.session_state.processed_data = df.dropna(subset=["Abstract"])
 
 
 
 
 
340
 
341
  if st.session_state.processed_data is not None:
342
  df = st.session_state.processed_data
 
82
  except Exception:
83
  pass
84
 
85
+ def validate_excel_structure(df):
86
+ """Validate the structure and content of the Excel file"""
87
+ validation_messages = []
88
+
89
+ # Check for minimum content
90
+ if len(df) == 0:
91
+ validation_messages.append("File contains no data")
92
+ return False, validation_messages
93
+
94
+ # Check abstract length
95
+ if df['Abstract'].str.len().min() < 50:
96
+ validation_messages.append("Some abstracts are too short (less than 50 characters)")
97
+
98
+ # Check publication year format
99
+ try:
100
+ df['Publication Year'] = df['Publication Year'].astype(int)
101
+ if df['Publication Year'].min() < 1900 or df['Publication Year'].max() > 2025:
102
+ validation_messages.append("Invalid publication years detected")
103
+ except:
104
+ validation_messages.append("Invalid format in Publication Year column")
105
+
106
+ # Check if DOIs are in valid format (basic check)
107
+ if not df['DOI'].str.contains(r'10\.\d{4,}/.+', na=True).all():
108
+ validation_messages.append("Some DOIs are in invalid format")
109
+
110
+ return len(validation_messages) == 0, validation_messages
111
+
112
+
113
  @st.cache_data
114
  def process_excel(uploaded_file):
115
  """Process uploaded Excel file"""
 
123
  if missing_columns:
124
  st.error(f"Missing required columns: {', '.join(missing_columns)}")
125
  return None
126
+
127
+ # Check number of papers
128
+ if len(df) > 5:
129
+ st.error("❌ Your file contains more than 5 papers. Please upload a file with maximum 5 papers.")
130
+ return None
131
+
132
+ # Validate structure and content
133
+ is_valid, messages = validate_excel_structure(df)
134
+ if not is_valid:
135
+ for msg in messages:
136
+ st.error(f"❌ {msg}")
137
+ return None
138
+
139
+
140
+ # Check for empty required fields
141
+ for col in required_columns:
142
+ if df[col].isna().any():
143
+ st.warning(f"⚠️ Some entries in '{col}' column are empty. This might affect the analysis.")
144
+
145
 
146
  return df[required_columns]
147
  except Exception as e:
 
365
 
366
  def main():
367
  st.title("πŸ”¬ Biomedical Papers Analysis")
368
+
369
+ st.info("""
370
+ **πŸ“‹ File Upload Requirements:**
371
+ - Excel file (.xlsx or .xls) with **maximum 5 papers**
372
+ - Must contain these columns:
373
+ β€’ Abstract
374
+ β€’ Article Title
375
+ β€’ Authors
376
+ β€’ Source Title
377
+ β€’ Publication Year
378
+ β€’ DOI
379
+ β€’ Times Cited, All Databases
380
+ """)
381
+
382
 
383
  # File upload section
384
  uploaded_file = st.file_uploader(
385
+ "Upload Excel file containing papers (max 5 papers)",
386
  type=['xlsx', 'xls'],
387
  help="File must contain: Abstract, Article Title, Authors, Source Title, Publication Year, DOI"
388
  )
 
392
  question = ""
393
 
394
  if uploaded_file is not None:
395
+ # Process Excel file
396
  if st.session_state.processed_data is None:
397
  with st.spinner("Processing file..."):
398
  df = process_excel(uploaded_file)
399
  if df is not None:
400
+ df = df.dropna(subset=["Abstract"])
401
+ if len(df) > 0:
402
+ st.session_state.processed_data = df
403
+ st.success(f"βœ… Successfully loaded {len(df)} papers with abstracts")
404
+ else:
405
+ st.error("❌ No valid papers found after processing. Please check your file.")
406
 
407
  if st.session_state.processed_data is not None:
408
  df = st.session_state.processed_data