Santosh commited on
Commit
616d667
·
1 Parent(s): 2ccb279

fixed things

Browse files
Files changed (2) hide show
  1. app.py +372 -87
  2. datasetcards_new.parquet +3 -0
app.py CHANGED
@@ -1,46 +1,46 @@
1
  # import gradio as gr
2
  # import polars as pl
3
 
4
- # # Paths or HF Hub URLs for Parquet files
5
- # RICH_PARQUET_PATH = "all_rich_dataset_cards.parquet"
6
- # MISSING_PARQUET_PATH = "all_minimal_dataset_cards.parquet"
7
 
8
  # ROWS_PER_PAGE = 50
9
 
10
- # # Lazy load datasets
11
- # lazy_rich = pl.scan_parquet(RICH_PARQUET_PATH)
12
- # lazy_missing = pl.scan_parquet(MISSING_PARQUET_PATH)
13
-
14
- # current_lazy_df = lazy_missing # Default dataset
 
15
 
16
  # # Helper function to fetch a page
17
  # def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""):
18
  # filtered_df = lazy_df
19
  # if column and query:
20
  # query_lower = query.lower().strip()
21
- # # Case-insensitive search
22
  # filtered_df = filtered_df.with_columns([
23
  # pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column)
24
  # ]).filter(pl.col(column).str.contains(query_lower, literal=False))
25
  # start = page * ROWS_PER_PAGE
26
  # page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas()
 
 
 
 
27
  # total_rows = filtered_df.collect().height
28
  # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1
29
  # return page_df, total_pages
30
 
 
31
  # # Initialize first page
32
- # initial_df, total_pages = get_page(current_lazy_df, 0)
33
  # columns = list(initial_df.columns)
34
 
35
  # with gr.Blocks() as demo:
36
  # gr.Markdown("## Dataset Insight Portal")
37
-
38
- # # Dataset selection
39
- # dataset_select = gr.Dropdown(
40
- # choices=["DatasetCards rich in information", "DatasetCards missing information"],
41
- # value="DatasetCards missing information",
42
- # label="Select Dataset"
43
- # )
44
 
45
  # # Pagination controls
46
  # with gr.Row():
@@ -63,17 +63,7 @@
63
  # reset_btn = gr.Button("Reset", elem_id="small-btn")
64
 
65
  # # --- Functions ---
66
- # def load_dataset(dataset_choice):
67
- # global current_lazy_df
68
- # current_lazy_df = lazy_rich if dataset_choice == "DatasetCards rich in information" else lazy_missing
69
- # initial_df, total_pages = get_page(current_lazy_df, 0)
70
- # columns = list(initial_df.columns)
71
- # return (
72
- # gr.update(value=initial_df, headers=columns),
73
- # f"Total Pages: {total_pages}",
74
- # 0,
75
- # gr.update(choices=columns, value=columns[0])
76
- # )
77
 
78
  # def next_page_func(page, column, query):
79
  # page += 1
@@ -98,7 +88,6 @@
98
  # return page_df, f"Total Pages: {total_pages}", 0
99
 
100
  # # --- Event Listeners ---
101
- # dataset_select.change(load_dataset, dataset_select, [data_table, total_pages_display, page_number, col_dropdown])
102
  # next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
103
  # prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
104
  # search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number])
@@ -107,90 +96,386 @@
107
  # demo.launch()
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  import gradio as gr
111
  import polars as pl
 
 
 
 
 
112
 
113
- # Path for the combined Parquet file
114
- COMBINED_PARQUET_PATH = "datasetcards.parquet"
 
115
 
116
- ROWS_PER_PAGE = 50
117
 
118
- # Lazy load dataset
119
- lazy_df = pl.scan_parquet(COMBINED_PARQUET_PATH)
 
 
120
 
121
- # Helper function to fetch a page
122
- def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""):
123
- filtered_df = lazy_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  if column and query:
125
- query_lower = query.lower().strip()
126
- # Case-insensitive search
127
- filtered_df = filtered_df.with_columns([
128
- pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column)
129
- ]).filter(pl.col(column).str.contains(query_lower, literal=False))
 
 
 
130
  start = page * ROWS_PER_PAGE
131
- page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas()
132
- total_rows = filtered_df.collect().height
133
- total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1
134
  return page_df, total_pages
135
 
136
- # Initialize first page
137
- initial_df, total_pages = get_page(lazy_df, 0)
138
  columns = list(initial_df.columns)
139
 
140
  with gr.Blocks() as demo:
141
- gr.Markdown("## Dataset Insight Portal")
142
- gr.Markdown("This space allows you to explore the combined dataset of DatasetCards. "
143
- "You can navigate pages, search within columns, and inspect the dataset easily.")
144
-
145
- # Pagination controls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  with gr.Row():
147
- prev_btn = gr.Button("Previous", elem_id="small-btn")
148
- next_btn = gr.Button("Next", elem_id="small-btn")
149
  page_number = gr.Number(value=0, label="Page", precision=0)
150
  total_pages_display = gr.Label(value=f"Total Pages: {total_pages}")
151
 
152
- # Data table
153
  data_table = gr.Dataframe(
154
- value=initial_df, headers=columns, datatype="str",
155
- interactive=False, row_count=ROWS_PER_PAGE
 
 
 
156
  )
157
 
158
- # Column search
159
  with gr.Row():
160
- col_dropdown = gr.Dropdown(choices=columns, label="Column")
161
- search_text = gr.Textbox(label="Search")
162
- search_btn = gr.Button("Search", elem_id="small-btn")
163
- reset_btn = gr.Button("Reset", elem_id="small-btn")
164
-
165
- # --- Functions ---
166
- current_lazy_df = lazy_df # single dataset
167
-
168
- def next_page_func(page, column, query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  page += 1
170
- page_df, total_pages = get_page(current_lazy_df, page, column, query)
 
171
  if page >= total_pages:
172
  page = total_pages - 1
173
- page_df, total_pages = get_page(current_lazy_df, page, column, query)
174
- return page_df, f"Total Pages: {total_pages}", page
175
 
176
- def prev_page_func(page, column, query):
177
- page -= 1
178
- page = max(0, page)
179
- page_df, total_pages = get_page(current_lazy_df, page, column, query)
180
- return page_df, f"Total Pages: {total_pages}", page
181
-
182
- def search_func(column, query):
183
- page_df, total_pages = get_page(current_lazy_df, 0, column, query)
184
- return page_df, f"Total Pages: {total_pages}", 0
185
 
186
  def reset_func():
187
- page_df, total_pages = get_page(current_lazy_df, 0)
188
- return page_df, f"Total Pages: {total_pages}", 0
189
-
190
- # --- Event Listeners ---
191
- next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
192
- prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
193
- search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number])
194
- reset_btn.click(reset_func, [], [data_table, total_pages_display, page_number])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  demo.launch()
 
1
  # import gradio as gr
2
  # import polars as pl
3
 
4
+ # # Path for the combined Parquet file
5
+ # COMBINED_PARQUET_PATH = "datasetcards.parquet"
 
6
 
7
  # ROWS_PER_PAGE = 50
8
 
9
+ # # Lazy load dataset
10
+ # lazy_df = pl.scan_parquet(COMBINED_PARQUET_PATH)
11
+ # lazy_df = lazy_df.sort(
12
+ # by=["downloads", "last_modified"],
13
+ # descending=[True, True]
14
+ # )
15
 
16
  # # Helper function to fetch a page
17
  # def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""):
18
  # filtered_df = lazy_df
19
  # if column and query:
20
  # query_lower = query.lower().strip()
 
21
  # filtered_df = filtered_df.with_columns([
22
  # pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column)
23
  # ]).filter(pl.col(column).str.contains(query_lower, literal=False))
24
  # start = page * ROWS_PER_PAGE
25
  # page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas()
26
+
27
+ # # Replace NaN/None with empty string for display
28
+ # page_df = page_df.fillna("")
29
+
30
  # total_rows = filtered_df.collect().height
31
  # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1
32
  # return page_df, total_pages
33
 
34
+
35
  # # Initialize first page
36
+ # initial_df, total_pages = get_page(lazy_df, 0)
37
  # columns = list(initial_df.columns)
38
 
39
  # with gr.Blocks() as demo:
40
  # gr.Markdown("## Dataset Insight Portal")
41
+ # gr.Markdown("This space allows you to explore the dataset of DatasetCards.<br>"
42
+ # "You can navigate pages, search within columns, and inspect the dataset easily.<br>"
43
+ # )
 
 
 
 
44
 
45
  # # Pagination controls
46
  # with gr.Row():
 
63
  # reset_btn = gr.Button("Reset", elem_id="small-btn")
64
 
65
  # # --- Functions ---
66
+ # current_lazy_df = lazy_df # single dataset
 
 
 
 
 
 
 
 
 
 
67
 
68
  # def next_page_func(page, column, query):
69
  # page += 1
 
88
  # return page_df, f"Total Pages: {total_pages}", 0
89
 
90
  # # --- Event Listeners ---
 
91
  # next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
92
  # prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
93
  # search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number])
 
96
  # demo.launch()
97
 
98
 
99
+ # import gradio as gr
100
+ # import polars as pl
101
+
102
+ # COMBINED_PARQUET_PATH = "datasetcards.parquet"
103
+ # ROWS_PER_PAGE = 50
104
+
105
+ # # Load dataset
106
+ # df = pl.read_parquet(COMBINED_PARQUET_PATH) # eager DataFrame
107
+
108
+ # # Columns with dropdown instead of text search
109
+ # DROPDOWN_COLUMNS = ["reason", "category", "field", "keyword"]
110
+
111
+ # # Get unique values for the dropdown columns
112
+ # unique_values = {
113
+ # col: sorted(df[col].drop_nulls().unique().to_list()) for col in DROPDOWN_COLUMNS
114
+ # }
115
+
116
+ # # Get page helper
117
+ # def get_page(df, page, column, query):
118
+ # filtered_df = df
119
+
120
+ # if column and query:
121
+ # if column in DROPDOWN_COLUMNS:
122
+ # # Exact match from dropdown
123
+ # filtered_df = filtered_df.filter(pl.col(column) == query)
124
+ # else:
125
+ # # Text search
126
+ # q = query.lower().strip()
127
+ # filtered_df = (
128
+ # filtered_df.with_columns([
129
+ # pl.col(column).str.to_lowercase().alias(column)
130
+ # ])
131
+ # .filter(pl.col(column).str.contains(q, literal=False))
132
+ # )
133
+
134
+ # start = page * ROWS_PER_PAGE
135
+ # page_df = filtered_df[start:start + ROWS_PER_PAGE].to_pandas().fillna("")
136
+ # total_rows = filtered_df.height
137
+ # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 if total_rows > 0 else 1
138
+
139
+ # return page_df, total_pages
140
+
141
+
142
+ # # Initial page
143
+ # initial_df, total_pages = get_page(df, 0, None, "")
144
+ # columns = list(initial_df.columns)
145
+
146
+ # # Build Gradio app
147
+ # with gr.Blocks() as demo:
148
+ # gr.Markdown("## Dataset Insight Portal")
149
+ # gr.Markdown(
150
+ # "This space allows you to explore the dataset of DatasetCards.<br>"
151
+ # "You can navigate pages, search within columns, and inspect the dataset easily.<br>"
152
+ # )
153
+
154
+ # with gr.Row():
155
+ # prev_btn = gr.Button("Previous")
156
+ # next_btn = gr.Button("Next")
157
+ # page_number = gr.Number(value=0, label="Page", precision=0)
158
+ # total_pages_display = gr.Label(value=f"Total Pages: {total_pages}")
159
+
160
+ # data_table = gr.Dataframe(
161
+ # value=initial_df,
162
+ # headers=columns,
163
+ # datatype="str",
164
+ # interactive=False,
165
+ # row_count=ROWS_PER_PAGE,
166
+ # )
167
+
168
+ # with gr.Row():
169
+ # col_dropdown = gr.Dropdown(choices=columns, label="Column to Search")
170
+ # search_text = gr.Textbox(label="Search Text")
171
+ # search_dropdown = gr.Dropdown(choices=[], label="Select Value", visible=False)
172
+ # search_btn = gr.Button("Search")
173
+ # reset_btn = gr.Button("Reset")
174
+
175
+ # # Show dropdown only for certain columns
176
+ # def update_search_input(column):
177
+ # if column in DROPDOWN_COLUMNS:
178
+ # return gr.update(choices=unique_values[column], visible=True), gr.update(visible=False)
179
+ # else:
180
+ # return gr.update(visible=False), gr.update(visible=True)
181
+
182
+ # col_dropdown.change(update_search_input, col_dropdown, [search_dropdown, search_text])
183
+
184
+ # # Search function
185
+ # def search_func(page, column, txt, ddl):
186
+ # query = ddl if column in DROPDOWN_COLUMNS else txt
187
+ # page_df, total_pages = get_page(df, page, column, query)
188
+ # return page_df, f"Total Pages: {total_pages}", 0
189
+
190
+ # def next_page(page, column, txt, ddl):
191
+ # page += 1
192
+ # query = ddl if column in DROPDOWN_COLUMNS else txt
193
+ # page_df, total_pages = get_page(df, page, column, query)
194
+ # if page >= total_pages:
195
+ # page = total_pages - 1
196
+ # page_df, total_pages = get_page(df, page, column, query)
197
+ # return page_df, f"Total Pages: {total_pages}", page
198
+
199
+ # def prev_page(page, column, txt, ddl):
200
+ # page = max(0, page - 1)
201
+ # query = ddl if column in DROPDOWN_COLUMNS else txt
202
+ # page_df, total_pages = get_page(df, page, column, query)
203
+ # return page_df, f"Total Pages: {total_pages}", page
204
+
205
+ # def reset_func():
206
+ # page_df, total_pages = get_page(df, 0, None, "")
207
+ # return page_df, f"Total Pages: {total_pages}", 0, "", ""
208
+
209
+ # # Wire events
210
+ # inputs = [page_number, col_dropdown, search_text, search_dropdown]
211
+ # outputs = [data_table, total_pages_display, page_number]
212
+
213
+ # search_btn.click(search_func, inputs, outputs)
214
+ # next_btn.click(next_page, inputs, outputs)
215
+ # prev_btn.click(prev_page, inputs, outputs)
216
+ # reset_btn.click(reset_func, [], outputs + [search_text, search_dropdown])
217
+
218
+ # demo.launch()
219
+
220
  import gradio as gr
221
  import polars as pl
222
+ from huggingface_hub import HfApi
223
+ import re
224
+ # --- Hugging Face Org ---
225
+ org_name = "hugging-science"
226
+ api = HfApi()
227
 
228
+ def fetch_members():
229
+ members = api.list_organization_members(org_name)
230
+ return [member.username for member in members]
231
 
232
+ member_list = fetch_members()
233
 
234
+ # --- Dataset ---
235
+ COMBINED_PARQUET_PATH = "datasetcards_new.parquet"
236
+ UPDATED_PARQUET_PATH = "datasetcards_new.parquet"
237
+ ROWS_PER_PAGE = 50
238
 
239
+ # df = pl.read_parquet(COMBINED_PARQUET_PATH)
240
+ df = pl.read_parquet(COMBINED_PARQUET_PATH)
241
+ df = df.with_columns([
242
+ pl.lit("todo").alias("status"),
243
+ pl.lit("").alias("assigned_to")
244
+ ]).sort(by=["downloads", "last_modified", "usedStorage"], descending=[True, True, True])
245
+
246
+ if "reason" in df.columns:
247
+ df = df.with_columns([
248
+ pl.Series(
249
+ "reason",
250
+ ["short description" if x and "short description" in x.lower() else (x if x is not None else "") for x in df["reason"]]
251
+ )
252
+ ])
253
+
254
+
255
+
256
+
257
+ # Add editable columns if missing
258
+ for col in ["assigned_to", "status"]:
259
+ if col not in df.columns:
260
+ default_val = "" if col == "assigned_to" else "todo"
261
+ df = df.with_columns(pl.lit(default_val).alias(col))
262
+ else:
263
+ # Fill nulls with default
264
+ default_val = "" if col == "assigned_to" else "todo"
265
+ df = df.with_columns(pl.col(col).fill_null(default_val))
266
+
267
+ # --- Columns ---
268
+ DROPDOWN_COLUMNS = ["reason", "category", "field", "keyword", "assigned_to", "status"]
269
+ STATUS_OPTIONS = ["todo", "inprogress", "PR submitted", "PR merged"]
270
+
271
+ # Prepare unique values for dropdown search
272
+ unique_values = {col: sorted(df[col].drop_nulls().unique().to_list()) for col in DROPDOWN_COLUMNS}
273
+ unique_values['assigned_to'] = sorted(member_list)
274
+ unique_values['status'] = STATUS_OPTIONS
275
+
276
+ # --- Helper to get page ---
277
+ def get_page(df, page, column=None, query=None):
278
+ filtered_df = df
279
  if column and query:
280
+ if column in DROPDOWN_COLUMNS:
281
+ filtered_df = filtered_df.filter(pl.col(column) == query)
282
+ else:
283
+ q = query.lower().strip()
284
+ filtered_df = (
285
+ filtered_df.with_columns([pl.col(column).str.to_lowercase().alias(column)])
286
+ .filter(pl.col(column).str.contains(q, literal=False))
287
+ )
288
  start = page * ROWS_PER_PAGE
289
+ page_df = filtered_df[start:start + ROWS_PER_PAGE].to_pandas().fillna("")
290
+ total_rows = filtered_df.height
291
+ total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 if total_rows > 0 else 1
292
  return page_df, total_pages
293
 
294
+ initial_df, total_pages = get_page(df, 0)
 
295
  columns = list(initial_df.columns)
296
 
297
  with gr.Blocks() as demo:
298
+ gr.Markdown("""
299
+ # Dataset Insight Portal
300
+
301
+ Welcome! This portal helps you explore and manage datasets from our Hugging Face organization.
302
+
303
+ ## What is this space for?
304
+ This space provides a table of datasets along with metadata. You can:
305
+ - Browse datasets with pagination.
306
+ - Search datasets by various fields.
307
+ - Assign responsibility for reviewing datasets (`assigned_to`).
308
+ - Track progress using `status`.
309
+
310
+ ## Why the table?
311
+ The table gives a structured view of all datasets, making it easy to sort, filter, and update information for each dataset.
312
+
313
+ ## What does the table contain?
314
+ Each row represents a dataset. Columns include:
315
+ - **dataset_id**: Unique identifier of the dataset.
316
+ - **dataset_url**: Link to the dataset page on Hugging Face.
317
+ - **downloads**: Number of downloads.
318
+ - **author**: Dataset author.
319
+ - **license**: License type.
320
+ - **tags**: Tags describing the dataset. Obtained from the dataset card.
321
+ - **task_categories**: Categories of tasks the dataset is useful for. Obtained from the dataset card.
322
+ - **last_modified**: Date of last update.
323
+ - **field, keyword**: Metadata columns describing dataset purpose based on heuristics. Use the `field` and `keyword` to filter for science based datasets.
324
+ - **category**: Category of the dataset (`rich` means it is good dataset card. `minimal` means it needs improvement for the reasons below).
325
+ - **reason**: Reason why the dataset is classified as `minimal`. Options: `Failed to load card`, `No metadata and no description`, `No metadata and has description`, `Short description`.
326
+ - **usedStorage**: Storage used by the dataset (bytes).
327
+ - **assigned_to**: Person responsible for the dataset (editable).
328
+ - **status**: Progress status (editable). Options: `todo`, `inprogress`, `PR submitted`, `PR merged`.
329
+
330
+ ## How to use search
331
+ - Select a **column** from the dropdown.
332
+ - If the column is textual, type your query in the text box.
333
+ - If the column is a dropdown (like `assigned_to` or `status`), select the value from the dropdown.
334
+ - Click **Search** to filter the table.
335
+
336
+ ## How to add or update `assigned_to` and `status`
337
+ 1. Search for the **dataset_id** initially.
338
+ 2. Then, select the **dataset_id** from the dropdown below the table.
339
+ 3. Choose the person responsible in **Assigned To**. If you are a member of the organization, your username should appear in the list. Else refresh and try again.
340
+ 4. Select the current status in **Status**.
341
+ 5. Click **Save Changes** to update the table and persist the changes.
342
+ 6. Use **Refresh All** to reload the table and the latest members list.
343
+
344
+ This portal makes it easy to keep track of dataset reviews, assignments, and progress all in one place.
345
+ """)
346
+
347
+ # --- Pagination controls ---
348
  with gr.Row():
349
+ prev_btn = gr.Button("Previous")
350
+ next_btn = gr.Button("Next")
351
  page_number = gr.Number(value=0, label="Page", precision=0)
352
  total_pages_display = gr.Label(value=f"Total Pages: {total_pages}")
353
 
354
+ # --- Data table ---
355
  data_table = gr.Dataframe(
356
+ value=initial_df,
357
+ headers=columns,
358
+ datatype="str",
359
+ interactive=False,
360
+ row_count=ROWS_PER_PAGE
361
  )
362
 
363
+ # --- Search controls ---
364
  with gr.Row():
365
+ col_dropdown = gr.Dropdown(choices=columns, label="Column to Search")
366
+ search_text = gr.Textbox(label="Search Text")
367
+ search_dropdown = gr.Dropdown(choices=[], label="Select Value", visible=False)
368
+ search_btn = gr.Button("Search")
369
+ reset_btn = gr.Button("Reset")
370
+
371
+ # --- Dataset selection & editable fields ---
372
+ selected_dataset_id = gr.Dropdown(label="Select dataset_id", choices=initial_df['dataset_id'].tolist())
373
+ assigned_to_input = gr.Dropdown(choices=member_list, label="Assigned To")
374
+ # status_input = gr.Dropdown(choices=STATUS_OPTIONS, label="Status")
375
+ status_input = gr.Dropdown(choices=STATUS_OPTIONS, label="Status", value="todo")
376
+
377
+
378
+ save_btn = gr.Button("Save Changes")
379
+ refresh_btn = gr.Button("Refresh All")
380
+ save_message = gr.Textbox(label="Save Status", interactive=False)
381
+
382
+ # --- Update search input depending on column ---
383
+ def update_search_input(column):
384
+ if column in DROPDOWN_COLUMNS:
385
+ return gr.update(choices=unique_values[column], visible=True), gr.update(visible=False)
386
+ else:
387
+ return gr.update(visible=False), gr.update(visible=True)
388
+
389
+ col_dropdown.change(update_search_input, col_dropdown, [search_dropdown, search_text])
390
+
391
+ # --- Prefill editable fields ---
392
+ def prefill_fields(dataset_id):
393
+ if not dataset_id:
394
+ return "", "todo"
395
+ dataset_id = str(dataset_id)
396
+ filtered = [row for row in df.to_dicts() if str(row.get("dataset_id")) == dataset_id]
397
+ if not filtered:
398
+ return "", "todo"
399
+ row = filtered[0]
400
+ return row.get("assigned_to", ""), row.get("status", "todo")
401
+
402
+ selected_dataset_id.change(prefill_fields, selected_dataset_id, [assigned_to_input, status_input])
403
+
404
+ # --- Search function ---
405
+ def search_func(page, column, txt, ddl):
406
+ query = ddl if column in DROPDOWN_COLUMNS else txt
407
+ page_df, total_pages = get_page(df, page, column, query)
408
+ return page_df, f"Total Pages: {total_pages}", 0, gr.update(choices=page_df['dataset_id'].tolist())
409
+
410
+ # --- Pagination functions ---
411
+ def next_page(page, column, txt, ddl):
412
  page += 1
413
+ query = ddl if column in DROPDOWN_COLUMNS else txt
414
+ page_df, total_pages = get_page(df, page, column, query)
415
  if page >= total_pages:
416
  page = total_pages - 1
417
+ page_df, total_pages = get_page(df, page, column, query)
418
+ return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist())
419
 
420
+ def prev_page(page, column, txt, ddl):
421
+ page = max(0, page - 1)
422
+ query = ddl if column in DROPDOWN_COLUMNS else txt
423
+ page_df, total_pages = get_page(df, page, column, query)
424
+ return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist())
 
 
 
 
425
 
426
  def reset_func():
427
+ page_df, total_pages = get_page(df, 0)
428
+ return page_df, f"Total Pages: {total_pages}", 0, gr.update(choices=page_df['dataset_id'].tolist())
429
+
430
+ # --- Save changes & refresh ---
431
+ def save_changes(dataset_id, assigned_to_val, status_val, page_val, col, txt, ddl):
432
+ global df
433
+ if not dataset_id:
434
+ return gr.update(value="Please select a row first."), None, None, None
435
+ df = df.with_columns([
436
+ pl.when(pl.col("dataset_id") == dataset_id).then(pl.lit(assigned_to_val)).otherwise(pl.col("assigned_to")).alias("assigned_to"),
437
+ pl.when(pl.col("dataset_id") == dataset_id).then(pl.lit(status_val)).otherwise(pl.col("status")).alias("status")
438
+ ])
439
+ df.write_parquet(UPDATED_PARQUET_PATH)
440
+ page_df, total_pages = get_page(df, page_val, col, txt if col not in DROPDOWN_COLUMNS else ddl)
441
+ return (
442
+ gr.update(value=f"Saved changes for dataset_id: {dataset_id}"),
443
+ page_df,
444
+ gr.update(choices=page_df['dataset_id'].tolist()),
445
+ f"Total Pages: {total_pages}"
446
+ )
447
+
448
+ # --- Refresh All: table + members ---
449
+ def refresh_all(page, column, txt, ddl):
450
+ global df, member_list, unique_values
451
+ # Refresh members
452
+ member_list = fetch_members()
453
+ unique_values['assigned_to'] = sorted(member_list)
454
+ # Refresh table
455
+ try:
456
+ df = pl.read_parquet(UPDATED_PARQUET_PATH)
457
+ except FileNotFoundError:
458
+ pass
459
+ page_df, total_pages = get_page(df, page, column, txt if column not in DROPDOWN_COLUMNS else ddl)
460
+ return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist()), gr.update(choices=member_list)
461
+
462
+ # --- Wire buttons ---
463
+ inputs_search = [page_number, col_dropdown, search_text, search_dropdown]
464
+ outputs_search = [data_table, total_pages_display, page_number, selected_dataset_id]
465
+
466
+ search_btn.click(search_func, inputs_search, outputs_search)
467
+ next_btn.click(next_page, inputs_search, outputs_search)
468
+ prev_btn.click(prev_page, inputs_search, outputs_search)
469
+ reset_btn.click(reset_func, [], outputs_search)
470
+ save_btn.click(
471
+ save_changes,
472
+ [selected_dataset_id, assigned_to_input, status_input, page_number, col_dropdown, search_text, search_dropdown],
473
+ [save_message, data_table, selected_dataset_id, total_pages_display]
474
+ )
475
+ refresh_btn.click(
476
+ refresh_all,
477
+ inputs=[page_number, col_dropdown, search_text, search_dropdown],
478
+ outputs=[data_table, total_pages_display, page_number, selected_dataset_id, assigned_to_input]
479
+ )
480
 
481
  demo.launch()
datasetcards_new.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d3770a3024eaf459d5c12d2c4a9d0d5a5043660d0a15c062a387595602eacf
3
+ size 38347730