Spaces:

Rogerjs
/

NeuroNarrative-Lite

Sleeping

App Files Files Community

Rogerjs commited on Dec 6, 2024

Commit

49be262

verified ·

1 Parent(s): 7fd8d2e

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -39

app.py CHANGED Viewed

@@ -6,28 +6,27 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
-# Load an open-source LLM model with no additional training
 model_name = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
     torch_dtype=torch.float16,
-    device_map="auto"  # Automatically selects CPU/GPU if available
 )
 def compute_band_power(psd, freqs, fmin, fmax):
-    """Compute mean band power in the given frequency range."""
     freq_mask = (freqs >= fmin) & (freqs <= fmax)
-    # Take the mean across channels and frequencies
     band_psd = psd[:, freq_mask].mean()
     return float(band_psd)
-def load_eeg_data(file_path):
     """
-    Load EEG data from a file.
-    If FIF file is detected, use MNE's read_raw_fif.
-    If CSV file is detected, load via pandas and create a RawArray.
     """
     _, file_ext = os.path.splitext(file_path)
     file_ext = file_ext.lower()
@@ -35,24 +34,33 @@ def load_eeg_data(file_path):
     if file_ext == '.fif':
         raw = mne.io.read_raw_fif(file_path, preload=True)
     elif file_ext == '.csv':
-        # Assume first column is 'time', and subsequent columns are channels
         df = pd.read_csv(file_path)
-        if 'time' not in df.columns:
-            raise ValueError("CSV must contain a 'time' column for timestamps.")
-        time = df['time'].values
-        data = df.drop(columns=['time']).values.T  # shape: (n_channels, n_samples)
-        # Estimate sampling frequency from time vector (assuming uniform)
-        # This is a simplistic approach: we take 1 / average time step.
-        # Make sure time is in seconds
-        if len(time) < 2:
-            raise ValueError("Not enough time points in CSV.")
-        sfreq = 1.0 / np.mean(np.diff(time))
-        # Create MNE Info
-        ch_names = list(df.columns)
-        ch_names.remove('time')
         ch_types = ['eeg'] * len(ch_names)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
@@ -62,31 +70,24 @@ def load_eeg_data(file_path):
     return raw
-def process_eeg(file):
-    # Load EEG data
-    raw = load_eeg_data(file.name)
-    # Compute PSD (Power Spectral Density) between 1 and 40 Hz
     psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40)
-    # Compute simple band powers
     alpha_power = compute_band_power(psd, freqs, 8, 12)
     beta_power = compute_band_power(psd, freqs, 13, 30)
-    # Create a short summary of the extracted features
     data_summary = (
         f"Alpha power: {alpha_power:.3f}, Beta power: {beta_power:.3f}. "
         f"The EEG shows stable alpha rhythms and slightly elevated beta activity."
     )
-    # Prepare the prompt for the language model
     prompt = f"""You are a neuroscientist analyzing EEG features.
 Data Summary: {data_summary}
 Provide a concise, user-friendly interpretation of these findings in simple terms.
 """
-    # Generate the summary using the LLM
     inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95
@@ -97,12 +98,18 @@ Provide a concise, user-friendly interpretation of these findings in simple term
 iface = gr.Interface(
     fn=process_eeg,
-    inputs=gr.File(label="Upload your EEG data (FIF or CSV)"),
     outputs="text",
-    title="NeuroNarrative-Lite: EEG Summary",
-    description=("Upload EEG data in FIF (MNE native) or CSV format. "
-                 "The system extracts basic EEG features and generates "
-                 "a human-readable summary using an open-source language model.")
 )
 if __name__ == "__main__":

 import torch
 import os
 model_name = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
     torch_dtype=torch.float16,
+    device_map="auto"
 )
 def compute_band_power(psd, freqs, fmin, fmax):
     freq_mask = (freqs >= fmin) & (freqs <= fmax)
     band_psd = psd[:, freq_mask].mean()
     return float(band_psd)
+def load_eeg_data(file_path, default_sfreq=256.0, time_col='time'):
     """
+    Load EEG data from a file with flexible CSV handling.
+    - If FIF: Use read_raw_fif.
+    - If CSV:
+       * If `time_col` is present, use it as time.
+       * Otherwise, assume a default sfreq and treat all columns as channels.
     """
     _, file_ext = os.path.splitext(file_path)
     file_ext = file_ext.lower()
     if file_ext == '.fif':
         raw = mne.io.read_raw_fif(file_path, preload=True)
     elif file_ext == '.csv':
         df = pd.read_csv(file_path)
+        # Remove non-numeric columns except time_col
+        for col in df.columns:
+            if col != time_col:
+                # Drop non-numeric columns if any
+                if not pd.api.types.is_numeric_dtype(df[col]):
+                    df = df.drop(columns=[col])
+        if time_col in df.columns:
+            # Use the provided time column
+            time = df[time_col].values
+            data_df = df.drop(columns=[time_col])
+            if len(time) < 2:
+                raise ValueError("Not enough time points to estimate sampling frequency.")
+            sfreq = 1.0 / np.mean(np.diff(time))
+        else:
+            # No explicit time column, assume uniform sampling at default_sfreq
+            sfreq = default_sfreq
+            data_df = df
+        # Channels are all remaining columns
+        ch_names = list(data_df.columns)
+        data = data_df.values.T  # shape: (n_channels, n_samples)
+        # Create MNE info
         ch_types = ['eeg'] * len(ch_names)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
     return raw
+def process_eeg(file, default_sfreq, time_col):
+    raw = load_eeg_data(file.name, default_sfreq=float(default_sfreq), time_col=time_col)
     psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40)
     alpha_power = compute_band_power(psd, freqs, 8, 12)
     beta_power = compute_band_power(psd, freqs, 13, 30)
     data_summary = (
         f"Alpha power: {alpha_power:.3f}, Beta power: {beta_power:.3f}. "
         f"The EEG shows stable alpha rhythms and slightly elevated beta activity."
     )
     prompt = f"""You are a neuroscientist analyzing EEG features.
 Data Summary: {data_summary}
 Provide a concise, user-friendly interpretation of these findings in simple terms.
 """
     inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95
 iface = gr.Interface(
     fn=process_eeg,
+    inputs=[
+        gr.File(label="Upload your EEG data (FIF or CSV)"),
+        gr.Textbox(label="Default Sampling Frequency if no time column (Hz)", value="256"),
+        gr.Textbox(label="Time column name (if exists)", value="time")
+    ],
     outputs="text",
+    title="NeuroNarrative-Lite: EEG Summary (Flexible CSV Handling)",
+    description=(
+        "Upload EEG data in FIF or CSV format. "
+        "If CSV, either include a 'time' column or specify a default sampling frequency. "
+        "Non-numeric columns will be removed (except the chosen time column)."
+    )
 )
 if __name__ == "__main__":