ChingCL commited on
Commit
83afe02
·
verified ·
1 Parent(s): 4beff9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -49
app.py CHANGED
@@ -1,83 +1,62 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import re
4
- import json
5
 
6
- # 提取 JSON 格式中的文本部分
7
- def extract_text_from_json(text):
8
- try:
9
- data = json.loads(text)
10
- if isinstance(data, dict):
11
- if 'question' in data:
12
- return data['question']['content']
13
- if 'content' in data:
14
- return data['content']
15
- except json.JSONDecodeError:
16
- return text
17
- return text
18
-
19
- # 檢查 $ 符號前後是否有空格
20
  def check_spacing_around_dollar(df):
21
  errors = []
22
  for i, row in df.iterrows():
23
  for col in df.columns:
24
- text = extract_text_from_json(str(row[col]))
25
- matches = re.finditer(r'\$\S+|\S+\$', text)
26
  for match in matches:
27
- if (match.start() > 0 and text[match.start() - 1] != ' ') or (match.end() < len(text) and text[match.end()] != ' '):
28
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
29
  return errors
30
 
31
- # 檢查 $ 符號之間是否有空格
32
  def check_spacing_between_dollars(df):
33
  errors = []
34
  for i, row in df.iterrows():
35
  for col in df.columns:
36
- text = extract_text_from_json(str(row[col]))
37
- matches = re.finditer(r'\$\S+?(?=\$)', text)
38
  for match in matches:
39
  if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
40
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
41
  return errors
42
 
43
- # 檢查數字前後是否有 $ 符號
44
  def check_numbers_surrounded_by_dollar(df):
45
  errors = []
46
  for i, row in df.iterrows():
47
  for col in df.columns:
48
- text = extract_text_from_json(str(row[col]))
49
- matches = re.finditer(r'\b\d+\b', text)
50
  for match in matches:
51
- if not (match.start() > 0 and text[match.start() - 1] == '$' and match.end() < len(text) and text[match.end()] == '$'):
52
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
53
  return errors
54
 
55
  # 處理檔案並執行檢查
56
  def process_file(file):
57
- try:
58
- if file.name.endswith('.csv'):
59
- df = pd.read_csv(file.name)
60
- elif file.name.endswith('.xlsx'):
61
- df = pd.read_excel(file.name)
62
- else:
63
- return "只支持 CSV 和 XLSX 檔案"
64
-
65
- # 打印 DataFrame 以檢查內容
66
- print("讀取的 DataFrame:")
67
- print(df.head())
68
-
69
- # 執行檢查
70
- errors1 = check_spacing_around_dollar(df)
71
- errors2 = check_spacing_between_dollars(df)
72
- errors3 = check_numbers_surrounded_by_dollar(df)
73
-
74
- return {
75
- "第一個檢查": errors1,
76
- "第二個檢查": errors2,
77
- "第三個檢查": errors3
78
- }
79
- except Exception as e:
80
- return str(e)
81
 
82
  # Gradio 介面
83
  iface = gr.Interface(
 
1
  import gradio as gr
2
  import pandas as pd
3
  import re
 
4
 
5
+ # 第一個檢查:檢查$符號前後是否有空格
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def check_spacing_around_dollar(df):
7
  errors = []
8
  for i, row in df.iterrows():
9
  for col in df.columns:
10
+ text = str(row[col])
11
+ matches = list(re.finditer(r'(\s?\$\S*?\s?|\s?\S*?\$)', text))
12
  for match in matches:
13
+ if not (text[match.start() - 1] == ' ' and text[match.end()] == ' '):
14
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
15
  return errors
16
 
17
+ # 第二個檢查:檢查$符號之間是否有空格
18
  def check_spacing_between_dollars(df):
19
  errors = []
20
  for i, row in df.iterrows():
21
  for col in df.columns:
22
+ text = str(row[col])
23
+ matches = list(re.finditer(r'\$\S+?(?=\$)', text))
24
  for match in matches:
25
  if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
26
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
27
  return errors
28
 
29
+ # 第三個檢查:檢查數字前後是否有$
30
  def check_numbers_surrounded_by_dollar(df):
31
  errors = []
32
  for i, row in df.iterrows():
33
  for col in df.columns:
34
+ text = str(row[col])
35
+ matches = list(re.finditer(r'\b\d+\b', text))
36
  for match in matches:
37
+ if not (text[match.start() - 1] == '$' and text[match.end()] == '$'):
38
  errors.append(f"行 {i+1} 列 '{col}': '{text}'")
39
  return errors
40
 
41
  # 處理檔案並執行檢查
42
  def process_file(file):
43
+ if file.name.endswith('.csv'):
44
+ df = pd.read_csv(file.name)
45
+ elif file.name.endswith('.xlsx'):
46
+ df = pd.read_excel(file.name)
47
+ else:
48
+ return "只支持 CSV 和 XLSX 檔案"
49
+
50
+ # 執行檢查
51
+ errors1 = check_spacing_around_dollar(df)
52
+ errors2 = check_spacing_between_dollars(df)
53
+ errors3 = check_numbers_surrounded_by_dollar(df)
54
+
55
+ return {
56
+ "第一個檢查": errors1,
57
+ "第二個檢查": errors2,
58
+ "第三個檢查": errors3
59
+ }
 
 
 
 
 
 
 
60
 
61
  # Gradio 介面
62
  iface = gr.Interface(