KevinHuSh commited on
Commit
41c7a59
·
1 Parent(s): 62e78ef

init README of deepdoc, add picture processer. (#71)

Browse files

* init README of deepdoc, add picture processer.

* add resume parsing

Files changed (42) hide show
  1. .gitignore +1 -1
  2. api/apps/conversation_app.py +1 -0
  3. api/apps/dialog_app.py +0 -2
  4. api/apps/document_app.py +3 -1
  5. api/db/db_models.py +1 -0
  6. api/db/services/llm_service.py +4 -4
  7. api/db/services/task_service.py +20 -5
  8. api/flask_session/2029240f6d1128be89ddc32729463129 +0 -0
  9. api/utils/file_utils.py +5 -1
  10. deepdoc/README.md +82 -0
  11. deepdoc/README_zh.md +1 -0
  12. deepdoc/parser/__init__.py +2 -217
  13. deepdoc/parser/ppt_parser.py +52 -0
  14. deepdoc/parser/resume/__init__.py +52 -0
  15. deepdoc/parser/resume/entities/__init__.py +0 -0
  16. deepdoc/parser/resume/entities/corporations.py +80 -0
  17. deepdoc/parser/resume/entities/degrees.py +24 -0
  18. deepdoc/parser/resume/entities/industries.py +692 -0
  19. deepdoc/parser/resume/entities/regions.py +762 -0
  20. deepdoc/parser/resume/entities/res/corp.tks.freq.json +65 -0
  21. deepdoc/parser/resume/entities/res/corp_baike_len.csv +0 -0
  22. deepdoc/parser/resume/entities/res/corp_tag.json +0 -0
  23. deepdoc/parser/resume/entities/res/good_corp.json +911 -0
  24. deepdoc/parser/resume/entities/res/good_sch.json +595 -0
  25. deepdoc/parser/resume/entities/res/school.rank.csv +1627 -0
  26. deepdoc/parser/resume/entities/res/schools.csv +0 -0
  27. deepdoc/parser/resume/entities/schools.py +62 -0
  28. deepdoc/parser/resume/step_one.py +174 -0
  29. deepdoc/parser/resume/step_two.py +580 -0
  30. deepdoc/vision/ocr.py +5 -1
  31. rag/app/book.py +3 -3
  32. rag/app/laws.py +3 -3
  33. rag/app/manual.py +3 -4
  34. rag/app/naive.py +3 -4
  35. rag/app/paper.py +3 -4
  36. rag/app/picture.py +56 -0
  37. rag/app/presentation.py +4 -36
  38. rag/app/qa.py +3 -3
  39. rag/app/table.py +3 -4
  40. rag/llm/cv_model.py +39 -9
  41. rag/nlp/__init__.py +216 -0
  42. rag/svr/task_executor.py +50 -48
.gitignore CHANGED
@@ -6,7 +6,7 @@ __pycache__/
6
  hudet/
7
  cv/
8
  layout_app.py
9
- resume/
10
 
11
  # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
12
  # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 
6
  hudet/
7
  cv/
8
  layout_app.py
9
+ api/flask_session
10
 
11
  # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
12
  # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
api/apps/conversation_app.py CHANGED
@@ -163,6 +163,7 @@ def completion():
163
  del req["conversation_id"]
164
  del req["messages"]
165
  ans = chat(dia, msg, **req)
 
166
  conv.reference.append(ans["reference"])
167
  conv.message.append({"role": "assistant", "content": ans["answer"]})
168
  ConversationService.update_by_id(conv.id, conv.to_dict())
 
163
  del req["conversation_id"]
164
  del req["messages"]
165
  ans = chat(dia, msg, **req)
166
+ if not conv.reference: conv.reference = []
167
  conv.reference.append(ans["reference"])
168
  conv.message.append({"role": "assistant", "content": ans["answer"]})
169
  ConversationService.update_by_id(conv.id, conv.to_dict())
api/apps/dialog_app.py CHANGED
@@ -32,7 +32,6 @@ def set_dialog():
32
  dialog_id = req.get("dialog_id")
33
  name = req.get("name", "New Dialog")
34
  description = req.get("description", "A helpful Dialog")
35
- language = req.get("language", "Chinese")
36
  top_n = req.get("top_n", 6)
37
  similarity_threshold = req.get("similarity_threshold", 0.1)
38
  vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
@@ -80,7 +79,6 @@ def set_dialog():
80
  "name": name,
81
  "kb_ids": req["kb_ids"],
82
  "description": description,
83
- "language": language,
84
  "llm_id": llm_id,
85
  "llm_setting": llm_setting,
86
  "prompt_config": prompt_config,
 
32
  dialog_id = req.get("dialog_id")
33
  name = req.get("name", "New Dialog")
34
  description = req.get("description", "A helpful Dialog")
 
35
  top_n = req.get("top_n", 6)
36
  similarity_threshold = req.get("similarity_threshold", 0.1)
37
  vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
 
79
  "name": name,
80
  "kb_ids": req["kb_ids"],
81
  "description": description,
 
82
  "llm_id": llm_id,
83
  "llm_setting": llm_setting,
84
  "prompt_config": prompt_config,
api/apps/document_app.py CHANGED
@@ -272,7 +272,9 @@ def get(doc_id):
272
  response = flask.make_response(MINIO.get(doc.kb_id, doc.location))
273
  ext = re.search(r"\.([^.]+)$", doc.name)
274
  if ext:
275
- response.headers.set('Content-Type', 'application/%s'%ext.group(1))
 
 
276
  return response
277
  except Exception as e:
278
  return server_error_response(e)
 
272
  response = flask.make_response(MINIO.get(doc.kb_id, doc.location))
273
  ext = re.search(r"\.([^.]+)$", doc.name)
274
  if ext:
275
+ if doc.type == FileType.VISUAL.value:
276
+ response.headers.set('Content-Type', 'image/%s'%ext.group(1))
277
+ else: response.headers.set('Content-Type', 'application/%s'%ext.group(1))
278
  return response
279
  except Exception as e:
280
  return server_error_response(e)
api/db/db_models.py CHANGED
@@ -464,6 +464,7 @@ class Knowledgebase(DataBaseModel):
464
  avatar = TextField(null=True, help_text="avatar base64 string")
465
  tenant_id = CharField(max_length=32, null=False)
466
  name = CharField(max_length=128, null=False, help_text="KB name", index=True)
 
467
  description = TextField(null=True, help_text="KB description")
468
  embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
469
  permission = CharField(max_length=16, null=False, help_text="me|team", default="me")
 
464
  avatar = TextField(null=True, help_text="avatar base64 string")
465
  tenant_id = CharField(max_length=32, null=False)
466
  name = CharField(max_length=128, null=False, help_text="KB name", index=True)
467
+ language = CharField(max_length=32, null=True, default="Chinese", help_text="English|Chinese")
468
  description = TextField(null=True, help_text="KB description")
469
  embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
470
  permission = CharField(max_length=16, null=False, help_text="me|team", default="me")
api/db/services/llm_service.py CHANGED
@@ -57,7 +57,7 @@ class TenantLLMService(CommonService):
57
 
58
  @classmethod
59
  @DB.connection_context()
60
- def model_instance(cls, tenant_id, llm_type, llm_name=None):
61
  e, tenant = TenantService.get_by_id(tenant_id)
62
  if not e:
63
  raise LookupError("Tenant not found")
@@ -87,7 +87,7 @@ class TenantLLMService(CommonService):
87
  if model_config["llm_factory"] not in CvModel:
88
  return
89
  return CvModel[model_config["llm_factory"]](
90
- model_config["api_key"], model_config["llm_name"])
91
 
92
  if llm_type == LLMType.CHAT.value:
93
  if model_config["llm_factory"] not in ChatModel:
@@ -120,11 +120,11 @@ class TenantLLMService(CommonService):
120
 
121
 
122
  class LLMBundle(object):
123
- def __init__(self, tenant_id, llm_type, llm_name=None):
124
  self.tenant_id = tenant_id
125
  self.llm_type = llm_type
126
  self.llm_name = llm_name
127
- self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name)
128
  assert self.mdl, "Can't find mole for {}/{}/{}".format(tenant_id, llm_type, llm_name)
129
 
130
  def encode(self, texts: list, batch_size=32):
 
57
 
58
  @classmethod
59
  @DB.connection_context()
60
+ def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese"):
61
  e, tenant = TenantService.get_by_id(tenant_id)
62
  if not e:
63
  raise LookupError("Tenant not found")
 
87
  if model_config["llm_factory"] not in CvModel:
88
  return
89
  return CvModel[model_config["llm_factory"]](
90
+ model_config["api_key"], model_config["llm_name"], lang)
91
 
92
  if llm_type == LLMType.CHAT.value:
93
  if model_config["llm_factory"] not in ChatModel:
 
120
 
121
 
122
  class LLMBundle(object):
123
+ def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
124
  self.tenant_id = tenant_id
125
  self.llm_type = llm_type
126
  self.llm_name = llm_name
127
+ self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang)
128
  assert self.mdl, "Can't find mole for {}/{}/{}".format(tenant_id, llm_type, llm_name)
129
 
130
  def encode(self, texts: list, batch_size=32):
api/db/services/task_service.py CHANGED
@@ -27,7 +27,24 @@ class TaskService(CommonService):
27
  @classmethod
28
  @DB.connection_context()
29
  def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
30
- fields = [cls.model.id, cls.model.doc_id, cls.model.from_page,cls.model.to_page, Document.kb_id, Document.parser_id, Document.parser_config, Document.name, Document.type, Document.location, Document.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  docs = cls.model.select(*fields) \
32
  .join(Document, on=(cls.model.doc_id == Document.id)) \
33
  .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
@@ -42,7 +59,6 @@ class TaskService(CommonService):
42
  .paginate(1, items_per_page)
43
  return list(docs.dicts())
44
 
45
-
46
  @classmethod
47
  @DB.connection_context()
48
  def do_cancel(cls, id):
@@ -54,12 +70,11 @@ class TaskService(CommonService):
54
  pass
55
  return True
56
 
57
-
58
  @classmethod
59
  @DB.connection_context()
60
  def update_progress(cls, id, info):
61
- cls.model.update(progress_msg=cls.model.progress_msg + "\n"+info["progress_msg"]).where(
62
  cls.model.id == id).execute()
63
  if "progress" in info:
64
  cls.model.update(progress=info["progress"]).where(
65
- cls.model.id == id).execute()
 
27
  @classmethod
28
  @DB.connection_context()
29
  def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
30
+ fields = [
31
+ cls.model.id,
32
+ cls.model.doc_id,
33
+ cls.model.from_page,
34
+ cls.model.to_page,
35
+ Document.kb_id,
36
+ Document.parser_id,
37
+ Document.parser_config,
38
+ Document.name,
39
+ Document.type,
40
+ Document.location,
41
+ Document.size,
42
+ Knowledgebase.tenant_id,
43
+ Knowledgebase.language,
44
+ Tenant.embd_id,
45
+ Tenant.img2txt_id,
46
+ Tenant.asr_id,
47
+ cls.model.update_time]
48
  docs = cls.model.select(*fields) \
49
  .join(Document, on=(cls.model.doc_id == Document.id)) \
50
  .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
 
59
  .paginate(1, items_per_page)
60
  return list(docs.dicts())
61
 
 
62
  @classmethod
63
  @DB.connection_context()
64
  def do_cancel(cls, id):
 
70
  pass
71
  return True
72
 
 
73
  @classmethod
74
  @DB.connection_context()
75
  def update_progress(cls, id, info):
76
+ cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
77
  cls.model.id == id).execute()
78
  if "progress" in info:
79
  cls.model.update(progress=info["progress"]).where(
80
+ cls.model.id == id).execute()
api/flask_session/2029240f6d1128be89ddc32729463129 DELETED
Binary file (9 Bytes)
 
api/utils/file_utils.py CHANGED
@@ -167,7 +167,11 @@ def thumbnail(filename, blob):
167
  return "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode("utf-8")
168
 
169
  if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
170
- return ("data:image/%s;base64,"%filename.split(".")[-1]) + base64.b64encode(Image.open(BytesIO(blob)).thumbnail((30, 30)).tobytes()).decode("utf-8")
 
 
 
 
171
 
172
  if re.match(r".*\.(ppt|pptx)$", filename):
173
  import aspose.slides as slides
 
167
  return "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode("utf-8")
168
 
169
  if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
170
+ image = Image.open(BytesIO(blob))
171
+ image.thumbnail((30, 30))
172
+ buffered = BytesIO()
173
+ image.save(buffered, format="png")
174
+ return "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode("utf-8")
175
 
176
  if re.match(r".*\.(ppt|pptx)$", filename):
177
  import aspose.slides as slides
deepdoc/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ English | [简体中文](./README_zh.md)
2
+
3
+ #*Deep*Doc
4
+
5
+ ---
6
+
7
+ - [1. Introduction](#1)
8
+ - [2. Vision](#2)
9
+ - [3. Parser](#3)
10
+
11
+ <a name="1"></a>
12
+ ## 1. Introduction
13
+
14
+ ---
15
+ With a bunch of documents from various domains with various formats and along with diverse retrieval requirements,
16
+ an accurate analysis becomes a very challenge task. *Deep*Doc is born for that purpose.
17
+ There 2 parts in *Deep*Doc so far: vision and parser.
18
+
19
+ <a name="2"></a>
20
+ ## 2. Vision
21
+
22
+ ---
23
+
24
+ We use vision information to resolve problems as human being.
25
+ - OCR. Since a lot of documents presented as images or at least be able to transform to image,
26
+ OCR is a very essential and fundamental or even universal solution for text extraction.
27
+
28
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
29
+ <img src="https://lh6.googleusercontent.com/2xdiSjaGWkZ71YdORc71Ujf7jCHmO6G-6ONklzGiUYEh3QZpjPo6MQ9eqEFX20am_cdW4Ck0YRraXEetXWnM08kJd99yhik13Cy0_YKUAq2zVGR15LzkovRAmK9iT4o3hcJ8dTpspaJKUwt6R4gN7So" width="300"/>
30
+ </div>
31
+
32
+ - Layout recognition. Documents from different domain may have various layouts,
33
+ like, newspaper, magazine, book and résumé are distinct in terms of layout.
34
+ Only when machine have an accurate layout analysis, it can decide if these text parts are successive or not,
35
+ or this part needs Table Structure Recognition(TSR) to process, or this part is a figure and described with this caption.
36
+ We have 10 basic layout components which covers most cases:
37
+ - Text
38
+ - Title
39
+ - Figure
40
+ - Figure caption
41
+ - Table
42
+ - Table caption
43
+ - Header
44
+ - Footer
45
+ - Reference
46
+ - Equation
47
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
48
+ <img src="https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/ppstructure/docs/layout/layout.png?raw=true" width="900"/>
49
+ </div>
50
+
51
+ - Table Structure Recognition(TSR). Data table is a frequently used structure present data including numbers or text.
52
+ And the structure of a table might be very complex, like hierarchy headers, spanning cells and projected row headers.
53
+ Along with TSR, we also reassemble the content into sentences which could be well comprehended by LLM.
54
+ We have five labels for TSR task:
55
+ - Column
56
+ - Row
57
+ - Column header
58
+ - Projected row header
59
+ - Spanning cell
60
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
61
+ <img src="https://user-images.githubusercontent.com/10793386/139559159-cd23c972-8731-48ed-91df-f3f27e9f4d79.jpg" width="900"/>
62
+ </div>
63
+
64
+ <a name="3"></a>
65
+ ## 3. Parser
66
+
67
+ ---
68
+
69
+ Four kinds of document formats as PDF, DOCX, EXCEL and PPT have their corresponding parser.
70
+ The most complex one is PDF parser since PDF's flexibility. The output of PDF parser includes:
71
+ - Text chunks with their own positions in PDF(page number and rectangular positions).
72
+ - Tables with cropped image from the PDF, and contents which has already translated into natural language sentences.
73
+ - Figures with caption and text in the figures.
74
+
75
+ ###Résumé
76
+
77
+ ---
78
+ The résumé is a very complicated kind of document. A résumé which is composed of unstructured text
79
+ with various layouts could be resolved into structured data composed of nearly a hundred of fields.
80
+ We haven't opened the parser yet, as we open the processing method after parsing procedure.
81
+
82
+
deepdoc/README_zh.md ADDED
@@ -0,0 +1 @@
 
 
1
+ [English](./README.md) | 简体中文
deepdoc/parser/__init__.py CHANGED
@@ -1,223 +1,8 @@
1
- import random
2
 
3
  from .pdf_parser import HuParser as PdfParser
4
  from .docx_parser import HuDocxParser as DocxParser
5
  from .excel_parser import HuExcelParser as ExcelParser
6
-
7
- import re
8
-
9
- from nltk import word_tokenize
10
-
11
- from rag.nlp import stemmer, huqie
12
- from rag.utils import num_tokens_from_string
13
-
14
- BULLET_PATTERN = [[
15
- r"第[零一二三四五六七八九十百0-9]+(分?编|部分)",
16
- r"第[零一二三四五六七八九十百0-9]+章",
17
- r"第[零一二三四五六七八九十百0-9]+节",
18
- r"第[零一二三四五六七八九十百0-9]+条",
19
- r"[\((][零一二三四五六七八九十百]+[\))]",
20
- ], [
21
- r"第[0-9]+章",
22
- r"第[0-9]+节",
23
- r"[0-9]{,3}[\. 、]",
24
- r"[0-9]{,2}\.[0-9]{,2}",
25
- r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
26
- r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
27
- ], [
28
- r"第[零一二三四五六七八九十百0-9]+章",
29
- r"第[零一二三四五六七八九十百0-9]+节",
30
- r"[零一二三四五六七八九十百]+[ 、]",
31
- r"[\((][零一二三四五六七八九十百]+[\))]",
32
- r"[\((][0-9]{,2}[\))]",
33
- ], [
34
- r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)",
35
- r"Chapter (I+V?|VI*|XI|IX|X)",
36
- r"Section [0-9]+",
37
- r"Article [0-9]+"
38
- ]
39
- ]
40
-
41
- def random_choices(arr, k):
42
- k = min(len(arr), k)
43
- return random.choices(arr, k=k)
44
-
45
- def bullets_category(sections):
46
- global BULLET_PATTERN
47
- hits = [0] * len(BULLET_PATTERN)
48
- for i, pro in enumerate(BULLET_PATTERN):
49
- for sec in sections:
50
- for p in pro:
51
- if re.match(p, sec):
52
- hits[i] += 1
53
- break
54
- maxium = 0
55
- res = -1
56
- for i, h in enumerate(hits):
57
- if h <= maxium: continue
58
- res = i
59
- maxium = h
60
- return res
61
-
62
-
63
- def is_english(texts):
64
- eng = 0
65
- for t in texts:
66
- if re.match(r"[a-zA-Z]{2,}", t.strip()):
67
- eng += 1
68
- if eng / len(texts) > 0.8:
69
- return True
70
- return False
71
-
72
-
73
- def tokenize(d, t, eng):
74
- d["content_with_weight"] = t
75
- if eng:
76
- t = re.sub(r"([a-z])-([a-z])", r"\1\2", t)
77
- d["content_ltks"] = " ".join([stemmer.stem(w) for w in word_tokenize(t)])
78
- else:
79
- d["content_ltks"] = huqie.qie(t)
80
- d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
81
-
82
-
83
- def remove_contents_table(sections, eng=False):
84
- i = 0
85
- while i < len(sections):
86
- def get(i):
87
- nonlocal sections
88
- return (sections[i] if type(sections[i]) == type("") else sections[i][0]).strip()
89
-
90
- if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$",
91
- re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], re.IGNORECASE)):
92
- i += 1
93
- continue
94
- sections.pop(i)
95
- if i >= len(sections): break
96
- prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2])
97
- while not prefix:
98
- sections.pop(i)
99
- if i >= len(sections): break
100
- prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2])
101
- sections.pop(i)
102
- if i >= len(sections) or not prefix: break
103
- for j in range(i, min(i + 128, len(sections))):
104
- if not re.match(prefix, get(j)):
105
- continue
106
- for _ in range(i, j): sections.pop(i)
107
- break
108
-
109
-
110
- def make_colon_as_title(sections):
111
- if not sections: return []
112
- if type(sections[0]) == type(""): return sections
113
- i = 0
114
- while i < len(sections):
115
- txt, layout = sections[i]
116
- i += 1
117
- txt = txt.split("@")[0].strip()
118
- if not txt:
119
- continue
120
- if txt[-1] not in "::":
121
- continue
122
- txt = txt[::-1]
123
- arr = re.split(r"([。?!!?;;]| .)", txt)
124
- if len(arr) < 2 or len(arr[1]) < 32:
125
- continue
126
- sections.insert(i - 1, (arr[0][::-1], "title"))
127
- i += 1
128
-
129
-
130
- def hierarchical_merge(bull, sections, depth):
131
- if not sections or bull < 0: return []
132
- if type(sections[0]) == type(""): sections = [(s, "") for s in sections]
133
- sections = [(t,o) for t, o in sections if t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())]
134
- bullets_size = len(BULLET_PATTERN[bull])
135
- levels = [[] for _ in range(bullets_size + 2)]
136
-
137
- def not_title(txt):
138
- if re.match(r"第[零一二三四五六七八九十百0-9]+条", txt): return False
139
- if len(txt) >= 128: return True
140
- return re.search(r"[,;,。;!!]", txt)
141
-
142
- for i, (txt, layout) in enumerate(sections):
143
- for j, p in enumerate(BULLET_PATTERN[bull]):
144
- if re.match(p, txt.strip()) and not not_title(txt):
145
- levels[j].append(i)
146
- break
147
- else:
148
- if re.search(r"(title|head)", layout):
149
- levels[bullets_size].append(i)
150
- else:
151
- levels[bullets_size + 1].append(i)
152
- sections = [t for t, _ in sections]
153
- for s in sections: print("--", s)
154
-
155
- def binary_search(arr, target):
156
- if not arr: return -1
157
- if target > arr[-1]: return len(arr) - 1
158
- if target < arr[0]: return -1
159
- s, e = 0, len(arr)
160
- while e - s > 1:
161
- i = (e + s) // 2
162
- if target > arr[i]:
163
- s = i
164
- continue
165
- elif target < arr[i]:
166
- e = i
167
- continue
168
- else:
169
- assert False
170
- return s
171
-
172
- cks = []
173
- readed = [False] * len(sections)
174
- levels = levels[::-1]
175
- for i, arr in enumerate(levels[:depth]):
176
- for j in arr:
177
- if readed[j]: continue
178
- readed[j] = True
179
- cks.append([j])
180
- if i + 1 == len(levels) - 1: continue
181
- for ii in range(i + 1, len(levels)):
182
- jj = binary_search(levels[ii], j)
183
- if jj < 0: continue
184
- if jj > cks[-1][-1]: cks[-1].pop(-1)
185
- cks[-1].append(levels[ii][jj])
186
- for ii in cks[-1]: readed[ii] = True
187
- for i in range(len(cks)):
188
- cks[i] = [sections[j] for j in cks[i][::-1]]
189
- print("--------------\n", "\n* ".join(cks[i]))
190
-
191
- return cks
192
-
193
-
194
- def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
195
- if not sections: return []
196
- if type(sections[0]) == type(""): sections = [(s, "") for s in sections]
197
- cks = [""]
198
- tk_nums = [0]
199
- def add_chunk(t, pos):
200
- nonlocal cks, tk_nums, delimiter
201
- tnum = num_tokens_from_string(t)
202
- if tnum < 8: pos = ""
203
- if tk_nums[-1] > chunk_token_num:
204
- cks.append(t + pos)
205
- tk_nums.append(tnum)
206
- else:
207
- cks[-1] += t + pos
208
- tk_nums[-1] += tnum
209
-
210
- for sec, pos in sections:
211
- s, e = 0, 1
212
- while e < len(sec):
213
- if sec[e] in delimiter:
214
- add_chunk(sec[s: e+1], pos)
215
- s = e + 1
216
- e = s + 1
217
- else:
218
- e += 1
219
- if s < e: add_chunk(sec[s: e], pos)
220
-
221
- return cks
222
 
223
 
 
1
+
2
 
3
  from .pdf_parser import HuParser as PdfParser
4
  from .docx_parser import HuDocxParser as DocxParser
5
  from .excel_parser import HuExcelParser as ExcelParser
6
+ from .ppt_parser import HuPptParser as PptParser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
deepdoc/parser/ppt_parser.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+ #
13
+ from io import BytesIO
14
+ from pptx import Presentation
15
+
16
+
17
+ class HuPptParser(object):
18
+ def __init__(self):
19
+ super().__init__()
20
+
21
+ def __extract(self, shape):
22
+ if shape.shape_type == 19:
23
+ tb = shape.table
24
+ rows = []
25
+ for i in range(1, len(tb.rows)):
26
+ rows.append("; ".join([tb.cell(0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)]))
27
+ return "\n".join(rows)
28
+
29
+ if shape.has_text_frame:
30
+ return shape.text_frame.text
31
+
32
+ if shape.shape_type == 6:
33
+ texts = []
34
+ for p in shape.shapes:
35
+ t = self.__extract(p)
36
+ if t: texts.append(t)
37
+ return "\n".join(texts)
38
+
39
+ def __call__(self, fnm, from_page, to_page, callback=None):
40
+ ppt = Presentation(fnm) if isinstance(
41
+ fnm, str) else Presentation(
42
+ BytesIO(fnm))
43
+ txts = []
44
+ self.total_page = len(ppt.slides)
45
+ for i, slide in enumerate(ppt.slides[from_page: to_page]):
46
+ texts = []
47
+ for shape in slide.shapes:
48
+ txt = self.__extract(shape)
49
+ if txt: texts.append(txt)
50
+ txts.append("\n".join(texts))
51
+
52
+ return txts
deepdoc/parser/resume/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+
3
+
4
+ def refactor(cv):
5
+ for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]:
6
+ if n in cv and cv[n] is not None: del cv[n]
7
+ cv["is_deleted"] = 0
8
+ if "basic" not in cv: cv["basic"] = {}
9
+ if cv["basic"].get("photo2"): del cv["basic"]["photo2"]
10
+
11
+ for n in ["education", "work", "certificate", "project", "language", "skill", "training"]:
12
+ if n not in cv or cv[n] is None: continue
13
+ if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()]
14
+ if type(cv[n]) != type([]):
15
+ del cv[n]
16
+ continue
17
+ vv = []
18
+ for v in cv[n]:
19
+ if "external" in v and v["external"] is not None: del v["external"]
20
+ vv.append(v)
21
+ cv[n] = {str(i): vv[i] for i in range(len(vv))}
22
+
23
+ basics = [
24
+ ("basic_salary_month", "salary_month"),
25
+ ("expect_annual_salary_from", "expect_annual_salary"),
26
+ ]
27
+ for n, t in basics:
28
+ if cv["basic"].get(n):
29
+ cv["basic"][t] = cv["basic"][n]
30
+ del cv["basic"][n]
31
+
32
+ work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", ""))
33
+ edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", ""))
34
+
35
+ if work:
36
+ cv["basic"]["work_start_time"] = work[0].get("start_time", "")
37
+ cv["basic"]["management_experience"] = 'Y' if any(
38
+ [w.get("management_experience", '') == 'Y' for w in work]) else 'N'
39
+ cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
40
+
41
+ for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities",
42
+ "corporation_type", "scale", "corporation_name"]:
43
+ cv["basic"][n] = work[-1].get(n, "")
44
+
45
+ if edu:
46
+ for n in ["school_name", "discipline_name"]:
47
+ if n in edu[-1]: cv["basic"][n] = edu[-1][n]
48
+
49
+ cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
50
+ if "contact" not in cv: cv["contact"] = {}
51
+ if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "")
52
+ return cv
deepdoc/parser/resume/entities/__init__.py ADDED
File without changes
deepdoc/parser/resume/entities/corporations.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re,json,os
2
+ import pandas as pd
3
+ from rag.nlp import huqie
4
+ from . import regions
5
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
6
+ GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
7
+ GOODS["cid"] = GOODS["cid"].astype(str)
8
+ GOODS = GOODS.set_index(["cid"])
9
+ CORP_TKS = json.load(open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r"))
10
+ GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
11
+ CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
12
+
13
+ def baike(cid, default_v=0):
14
+ global GOODS
15
+ try:
16
+ return GOODS.loc[str(cid), "len"]
17
+ except Exception as e:
18
+ pass
19
+ return default_v
20
+
21
+
22
+ def corpNorm(nm, add_region=True):
23
+ global CORP_TKS
24
+ if not nm or type(nm)!=type(""):return ""
25
+ nm = huqie.tradi2simp(huqie.strQ2B(nm)).lower()
26
+ nm = re.sub(r"&amp;", "&", nm)
27
+ nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
28
+ nm = re.sub(r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE)
29
+ nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE)
30
+ if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm
31
+
32
+ tks = huqie.qie(nm).split(" ")
33
+ reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
34
+ nm = ""
35
+ for t in tks:
36
+ if regions.isName(t) or t in CORP_TKS:continue
37
+ if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):nm += " "
38
+ nm += t
39
+
40
+ r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
41
+ if r:nm = r.group(1)
42
+ r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
43
+ if r:nm = r.group(1)
44
+ return nm.strip() + (("" if not reg else "(%s)"%reg[0]) if add_region else "")
45
+
46
+
47
+ def rmNoise(n):
48
+ n = re.sub(r"[\((][^()()]+[))]", "", n)
49
+ n = re.sub(r"[,. &()()]+", "", n)
50
+ return n
51
+
52
+ GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
53
+ for c,v in CORP_TAG.items():
54
+ cc = corpNorm(rmNoise(c), False)
55
+ if not cc: print (c)
56
+ CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
57
+
58
+ def is_good(nm):
59
+ global GOOD_CORP
60
+ if nm.find("外派")>=0:return False
61
+ nm = rmNoise(nm)
62
+ nm = corpNorm(nm, False)
63
+ for n in GOOD_CORP:
64
+ if re.match(r"[0-9a-zA-Z]+$", n):
65
+ if n == nm: return True
66
+ elif nm.find(n)>=0:return True
67
+ return False
68
+
69
+ def corp_tag(nm):
70
+ global CORP_TAG
71
+ nm = rmNoise(nm)
72
+ nm = corpNorm(nm, False)
73
+ for n in CORP_TAG.keys():
74
+ if re.match(r"[0-9a-zA-Z., ]+$", n):
75
+ if n == nm: return CORP_TAG[n]
76
+ elif nm.find(n)>=0:
77
+ if len(n)<3 and len(nm)/len(n)>=2:continue
78
+ return CORP_TAG[n]
79
+ return []
80
+
deepdoc/parser/resume/entities/degrees.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TBL = {"94":"EMBA",
2
+ "6":"MBA",
3
+ "95":"MPA",
4
+ "92":"专升本",
5
+ "4":"专科",
6
+ "90":"中专",
7
+ "91":"中技",
8
+ "86":"初中",
9
+ "3":"博士",
10
+ "10":"博士后",
11
+ "1":"本科",
12
+ "2":"硕士",
13
+ "87":"职高",
14
+ "89":"高中"
15
+ }
16
+
17
+ TBL_ = {v:k for k,v in TBL.items()}
18
+
19
+ def get_name(id):
20
+ return TBL.get(str(id), "")
21
+
22
+ def get_id(nm):
23
+ if not nm:return ""
24
+ return TBL_.get(nm.upper().strip(), "")
deepdoc/parser/resume/entities/industries.py ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ TBL = {"1":{"name":"IT/通信/电子","parent":"0"},
3
+ "2":{"name":"互联网","parent":"0"},
4
+ "3":{"name":"电子商务","parent":"2"},
5
+ "4":{"name":"互联网金融","parent":"2"},
6
+ "5":{"name":"网络游戏","parent":"2"},
7
+ "6":{"name":"社交网络平台","parent":"2"},
8
+ "7":{"name":"视频音乐","parent":"2"},
9
+ "9":{"name":"安全","parent":"2"},
10
+ "10":{"name":"云计算","parent":"2"},
11
+ "12":{"name":"工具类客户端应用","parent":"2"},
12
+ "13":{"name":"互联网广告","parent":"2"},
13
+ "14":{"name":"企业互联网服务","parent":"2"},
14
+ "16":{"name":"在线教育","parent":"2"},
15
+ "17":{"name":"在线医疗","parent":"2"},
16
+ "19":{"name":"B2B","parent":"3"},
17
+ "20":{"name":"B2C","parent":"3"},
18
+ "21":{"name":"C2C","parent":"3"},
19
+ "22":{"name":"生活信息本地化","parent":"3"},
20
+ "23":{"name":"在线旅游","parent":"2"},
21
+ "24":{"name":"第三方支付","parent":"4"},
22
+ "26":{"name":"客户端游戏","parent":"5"},
23
+ "27":{"name":"网页游戏","parent":"5"},
24
+ "28":{"name":"手机游戏","parent":"5"},
25
+ "29":{"name":"微博","parent":"6"},
26
+ "30":{"name":"社交网站","parent":"6"},
27
+ "31":{"name":"在线视频","parent":"7"},
28
+ "32":{"name":"在线音乐","parent":"7"},
29
+ "35":{"name":"企业安全","parent":"9"},
30
+ "36":{"name":"个人安全","parent":"9"},
31
+ "37":{"name":"企业级云服务","parent":"10"},
32
+ "38":{"name":"个人级云服务","parent":"10"},
33
+ "43":{"name":"输入法","parent":"12"},
34
+ "44":{"name":"浏览器","parent":"12"},
35
+ "45":{"name":"词典","parent":"12"},
36
+ "46":{"name":"播放器","parent":"12"},
37
+ "47":{"name":"下载器","parent":"12"},
38
+ "48":{"name":"IM","parent":"12"},
39
+ "49":{"name":"广告服务","parent":"13"},
40
+ "50":{"name":"第三方广告网络平台","parent":"13"},
41
+ "51":{"name":"媒体代理","parent":"13"},
42
+ "52":{"name":"创意代理","parent":"13"},
43
+ "53":{"name":"IT-综合","parent":"1"},
44
+ "71":{"name":"团购","parent":"3"},
45
+ "72":{"name":"地图","parent":"2"},
46
+ "73":{"name":"数据存储","parent":"2"},
47
+ "414":{"name":"计算机软件","parent":"1"},
48
+ "415":{"name":"计算机硬件","parent":"1"},
49
+ "416":{"name":"计算机服务(系统、数据服务、维修)","parent":"1"},
50
+ "417":{"name":"通信/电信/网络设备","parent":"1"},
51
+ "418":{"name":"通信/电信运营、增值服务","parent":"1"},
52
+ "419":{"name":"电子技术/半导体/集成电路","parent":"1"},
53
+ "472":{"name":"P2P网贷","parent":"4"},
54
+ "473":{"name":"互联网理财","parent":"4"},
55
+ "474":{"name":"婚恋","parent":"6"},
56
+ "476":{"name":"虚拟化","parent":"10"},
57
+ "477":{"name":"邮箱","parent":"12"},
58
+ "478":{"name":"商业智能","parent":"14"},
59
+ "479":{"name":"企业建站","parent":"14"},
60
+ "480":{"name":"安防","parent":"14"},
61
+ "481":{"name":"网络营销","parent":"2"},
62
+ "487":{"name":"智能终端","parent":"2"},
63
+ "488":{"name":"移动互联网","parent":"2"},
64
+ "489":{"name":"数字城市","parent":"2"},
65
+ "490":{"name":"大数据","parent":"2"},
66
+ "491":{"name":"互联网人力资源","parent":"2"},
67
+ "492":{"name":"舆情监控","parent":"2"},
68
+ "493":{"name":"移动营销","parent":"481"},
69
+ "494":{"name":"微博营销","parent":"481"},
70
+ "495":{"name":"精准营销","parent":"481"},
71
+ "496":{"name":"海外营销","parent":"481"},
72
+ "497":{"name":"微信营销","parent":"481"},
73
+ "498":{"name":"智能手机","parent":"487"},
74
+ "499":{"name":"可穿戴设备","parent":"487"},
75
+ "500":{"name":"智能电视","parent":"487"},
76
+ "501":{"name":"WAP","parent":"488"},
77
+ "502":{"name":"物联网","parent":"489"},
78
+ "503":{"name":"O2O","parent":"489"},
79
+ "504":{"name":"数字出版","parent":"489"},
80
+ "505":{"name":"搜索","parent":"2"},
81
+ "506":{"name":"垂直搜索","parent":"505"},
82
+ "507":{"name":"无线搜索","parent":"505"},
83
+ "508":{"name":"网页搜索","parent":"505"},
84
+ "509":{"name":"网址导航","parent":"2"},
85
+ "510":{"name":"门户","parent":"2"},
86
+ "511":{"name":"网络文学","parent":"2"},
87
+ "512":{"name":"自媒体","parent":"2"},
88
+ "513":{"name":"金融","parent":"0"},
89
+ "514":{"name":"建筑与房地产","parent":"0"},
90
+ "515":{"name":"专业服务","parent":"0"},
91
+ "516":{"name":"教育培训","parent":"0"},
92
+ "517":{"name":"文化传媒","parent":"0"},
93
+ "518":{"name":"消费品","parent":"0"},
94
+ "519":{"name":"工业","parent":"0"},
95
+ "520":{"name":"交通物流","parent":"0"},
96
+ "521":{"name":"贸易","parent":"0"},
97
+ "522":{"name":"医药","parent":"0"},
98
+ "523":{"name":"医疗器械","parent":"522"},
99
+ "524":{"name":"保健品","parent":"518"},
100
+ "525":{"name":"服务业","parent":"0"},
101
+ "526":{"name":"能源/矿产/环保","parent":"0"},
102
+ "527":{"name":"化工","parent":"0"},
103
+ "528":{"name":"政府","parent":"0"},
104
+ "529":{"name":"公共事业","parent":"0"},
105
+ "530":{"name":"非盈利机构","parent":"0"},
106
+ "531":{"name":"农业","parent":"1131"},
107
+ "532":{"name":"林业","parent":"1131"},
108
+ "533":{"name":"畜牧业","parent":"1131"},
109
+ "534":{"name":"渔业","parent":"1131"},
110
+ "535":{"name":"学术科研","parent":"0"},
111
+ "536":{"name":"零售","parent":"0"},
112
+ "537":{"name":"银行","parent":"513"},
113
+ "538":{"name":"保险","parent":"513"},
114
+ "539":{"name":"证券","parent":"513"},
115
+ "540":{"name":"基金","parent":"513"},
116
+ "541":{"name":"信托","parent":"513"},
117
+ "542":{"name":"担保","parent":"513"},
118
+ "543":{"name":"典当","parent":"513"},
119
+ "544":{"name":"拍卖","parent":"513"},
120
+ "545":{"name":"投资/融资","parent":"513"},
121
+ "546":{"name":"期货","parent":"513"},
122
+ "547":{"name":"房地产开发","parent":"514"},
123
+ "548":{"name":"工程施工","parent":"514"},
124
+ "549":{"name":"建筑设计","parent":"514"},
125
+ "550":{"name":"房地产代理","parent":"514"},
126
+ "551":{"name":"物业管理","parent":"514"},
127
+ "552":{"name":"室内设计","parent":"514"},
128
+ "553":{"name":"装修装潢","parent":"514"},
129
+ "554":{"name":"市政工程","parent":"514"},
130
+ "555":{"name":"工程造价","parent":"514"},
131
+ "556":{"name":"工程监理","parent":"514"},
132
+ "557":{"name":"环境工程","parent":"514"},
133
+ "558":{"name":"园林景观","parent":"514"},
134
+ "559":{"name":"法律","parent":"515"},
135
+ "560":{"name":"人力资源","parent":"515"},
136
+ "561":{"name":"会计","parent":"1125"},
137
+ "562":{"name":"审计","parent":"515"},
138
+ "563":{"name":"检测认证","parent":"515"},
139
+ "565":{"name":"翻译","parent":"515"},
140
+ "566":{"name":"中介","parent":"515"},
141
+ "567":{"name":"咨询","parent":"515"},
142
+ "568":{"name":"外包服务","parent":"515"},
143
+ "569":{"name":"家教","parent":"516"},
144
+ "570":{"name":"早教","parent":"516"},
145
+ "571":{"name":"职业技能培训","parent":"516"},
146
+ "572":{"name":"外语培训","parent":"516"},
147
+ "573":{"name":"设计培训","parent":"516"},
148
+ "574":{"name":"IT培训","parent":"516"},
149
+ "575":{"name":"文艺体育培训","parent":"516"},
150
+ "576":{"name":"学历教育","parent":"516"},
151
+ "577":{"name":"管理培训","parent":"516"},
152
+ "578":{"name":"民办基础教育","parent":"516"},
153
+ "579":{"name":"广告","parent":"517"},
154
+ "580":{"name":"媒体","parent":"517"},
155
+ "581":{"name":"会展","parent":"517"},
156
+ "582":{"name":"公关","parent":"517"},
157
+ "583":{"name":"影视","parent":"517"},
158
+ "584":{"name":"艺术","parent":"517"},
159
+ "585":{"name":"文化传播","parent":"517"},
160
+ "586":{"name":"娱乐","parent":"517"},
161
+ "587":{"name":"体育","parent":"517"},
162
+ "588":{"name":"出版","parent":"517"},
163
+ "589":{"name":"休闲","parent":"517"},
164
+ "590":{"name":"动漫","parent":"517"},
165
+ "591":{"name":"市场推广","parent":"517"},
166
+ "592":{"name":"市场研究","parent":"517"},
167
+ "593":{"name":"食品","parent":"1129"},
168
+ "594":{"name":"饮料","parent":"1129"},
169
+ "595":{"name":"烟草","parent":"1129"},
170
+ "596":{"name":"酒品","parent":"518"},
171
+ "597":{"name":"服饰","parent":"518"},
172
+ "598":{"name":"纺织","parent":"518"},
173
+ "599":{"name":"化妆品","parent":"1129"},
174
+ "600":{"name":"日用品","parent":"1129"},
175
+ "601":{"name":"家电","parent":"518"},
176
+ "602":{"name":"家具","parent":"518"},
177
+ "603":{"name":"办公用品","parent":"518"},
178
+ "604":{"name":"奢侈品","parent":"518"},
179
+ "605":{"name":"珠宝","parent":"518"},
180
+ "606":{"name":"数码产品","parent":"518"},
181
+ "607":{"name":"玩具","parent":"518"},
182
+ "608":{"name":"图书","parent":"518"},
183
+ "609":{"name":"音像","parent":"518"},
184
+ "610":{"name":"钟表","parent":"518"},
185
+ "611":{"name":"箱包","parent":"518"},
186
+ "612":{"name":"母婴","parent":"518"},
187
+ "613":{"name":"营养保健","parent":"518"},
188
+ "614":{"name":"户外用品","parent":"518"},
189
+ "615":{"name":"健身器材","parent":"518"},
190
+ "616":{"name":"乐器","parent":"518"},
191
+ "617":{"name":"汽车用品","parent":"518"},
192
+ "619":{"name":"厨具","parent":"518"},
193
+ "620":{"name":"机械制造","parent":"519"},
194
+ "621":{"name":"流体控制","parent":"519"},
195
+ "622":{"name":"自动化控制","parent":"519"},
196
+ "623":{"name":"仪器仪表","parent":"519"},
197
+ "624":{"name":"航空/航天","parent":"519"},
198
+ "625":{"name":"交通设施","parent":"519"},
199
+ "626":{"name":"工业电子","parent":"519"},
200
+ "627":{"name":"建材","parent":"519"},
201
+ "628":{"name":"五金材料","parent":"519"},
202
+ "629":{"name":"汽车","parent":"519"},
203
+ "630":{"name":"印刷","parent":"519"},
204
+ "631":{"name":"造纸","parent":"519"},
205
+ "632":{"name":"包装","parent":"519"},
206
+ "633":{"name":"原材料及加工","parent":"519"},
207
+ "634":{"name":"物流","parent":"520"},
208
+ "635":{"name":"仓储","parent":"520"},
209
+ "636":{"name":"客运","parent":"520"},
210
+ "637":{"name":"快递","parent":"520"},
211
+ "638":{"name":"化学药","parent":"522"},
212
+ "639":{"name":"中药","parent":"522"},
213
+ "640":{"name":"生物制药","parent":"522"},
214
+ "641":{"name":"兽药","parent":"522"},
215
+ "642":{"name":"农药","parent":"522"},
216
+ "643":{"name":"CRO","parent":"522"},
217
+ "644":{"name":"消毒","parent":"522"},
218
+ "645":{"name":"医药商业","parent":"522"},
219
+ "646":{"name":"医疗服务","parent":"522"},
220
+ "647":{"name":"医疗器械","parent":"523"},
221
+ "648":{"name":"制药设备","parent":"523"},
222
+ "649":{"name":"医用耗材","parent":"523"},
223
+ "650":{"name":"手术器械","parent":"523"},
224
+ "651":{"name":"保健器材","parent":"524"},
225
+ "652":{"name":"性保健品","parent":"524"},
226
+ "653":{"name":"医药保养","parent":"524"},
227
+ "654":{"name":"医用保健","parent":"524"},
228
+ "655":{"name":"酒店","parent":"525"},
229
+ "656":{"name":"餐饮","parent":"525"},
230
+ "657":{"name":"旅游","parent":"525"},
231
+ "658":{"name":"生活服务","parent":"525"},
232
+ "659":{"name":"保健服务","parent":"525"},
233
+ "660":{"name":"运动健身","parent":"525"},
234
+ "661":{"name":"家政服务","parent":"525"},
235
+ "662":{"name":"婚庆服务","parent":"525"},
236
+ "663":{"name":"租赁服务","parent":"525"},
237
+ "664":{"name":"维修服务","parent":"525"},
238
+ "665":{"name":"石油天然气","parent":"526"},
239
+ "666":{"name":"电力","parent":"526"},
240
+ "667":{"name":"新能源","parent":"526"},
241
+ "668":{"name":"水利","parent":"526"},
242
+ "669":{"name":"矿产","parent":"526"},
243
+ "670":{"name":"采掘业","parent":"526"},
244
+ "671":{"name":"冶炼","parent":"526"},
245
+ "672":{"name":"环保","parent":"526"},
246
+ "673":{"name":"无机化工原料","parent":"527"},
247
+ "674":{"name":"有机化工原料","parent":"527"},
248
+ "675":{"name":"精细化学品","parent":"527"},
249
+ "676":{"name":"化工设备","parent":"527"},
250
+ "677":{"name":"化工工程","parent":"527"},
251
+ "678":{"name":"资产管理","parent":"513"},
252
+ "679":{"name":"金融租赁","parent":"513"},
253
+ "680":{"name":"征信及信评机构","parent":"513"},
254
+ "681":{"name":"资产评估机构","parent":"513"},
255
+ "683":{"name":"金融监管机构","parent":"513"},
256
+ "684":{"name":"国际贸易","parent":"521"},
257
+ "685":{"name":"海关","parent":"521"},
258
+ "686":{"name":"购物中心","parent":"536"},
259
+ "687":{"name":"超市","parent":"536"},
260
+ "688":{"name":"便利店","parent":"536"},
261
+ "689":{"name":"专卖店","parent":"536"},
262
+ "690":{"name":"专业店","parent":"536"},
263
+ "691":{"name":"百货店","parent":"536"},
264
+ "692":{"name":"杂货店","parent":"536"},
265
+ "693":{"name":"个人银行","parent":"537"},
266
+ "695":{"name":"私人银行","parent":"537"},
267
+ "696":{"name":"公司银行","parent":"537"},
268
+ "697":{"name":"投资银行","parent":"537"},
269
+ "698":{"name":"政策性银行","parent":"537"},
270
+ "699":{"name":"中央银行","parent":"537"},
271
+ "700":{"name":"人寿险","parent":"538"},
272
+ "701":{"name":"财产险","parent":"538"},
273
+ "702":{"name":"再保险","parent":"538"},
274
+ "703":{"name":"养老险","parent":"538"},
275
+ "704":{"name":"保险代理公司","parent":"538"},
276
+ "705":{"name":"公募基金","parent":"540"},
277
+ "707":{"name":"私募基金","parent":"540"},
278
+ "708":{"name":"第三方理财","parent":"679"},
279
+ "709":{"name":"资产管理公司","parent":"679"},
280
+ "711":{"name":"房产中介","parent":"566"},
281
+ "712":{"name":"职业中介","parent":"566"},
282
+ "713":{"name":"婚姻中介","parent":"566"},
283
+ "714":{"name":"战略咨询","parent":"567"},
284
+ "715":{"name":"投资咨询","parent":"567"},
285
+ "716":{"name":"心理咨询","parent":"567"},
286
+ "717":{"name":"留学移民咨询","parent":"567"},
287
+ "718":{"name":"工商注册代理","parent":"568"},
288
+ "719":{"name":"商标专利代理","parent":"568"},
289
+ "720":{"name":"财务代理","parent":"568"},
290
+ "721":{"name":"工程机械","parent":"620"},
291
+ "722":{"name":"农业机械","parent":"620"},
292
+ "723":{"name":"海工设备","parent":"620"},
293
+ "724":{"name":"包装机械","parent":"620"},
294
+ "725":{"name":"印刷机械","parent":"620"},
295
+ "726":{"name":"数控机床","parent":"620"},
296
+ "727":{"name":"矿山机械","parent":"620"},
297
+ "728":{"name":"水泵","parent":"621"},
298
+ "729":{"name":"管道","parent":"621"},
299
+ "730":{"name":"阀门","parent":"621"},
300
+ "732":{"name":"压缩机","parent":"621"},
301
+ "733":{"name":"集散控制系统","parent":"622"},
302
+ "734":{"name":"远程控制","parent":"622"},
303
+ "735":{"name":"液压系统","parent":"622"},
304
+ "736":{"name":"楼宇智能化","parent":"622"},
305
+ "737":{"name":"飞机制造","parent":"624"},
306
+ "738":{"name":"航空公司","parent":"624"},
307
+ "739":{"name":"发动机","parent":"624"},
308
+ "740":{"name":"复合材料","parent":"624"},
309
+ "741":{"name":"高铁","parent":"625"},
310
+ "742":{"name":"地铁","parent":"625"},
311
+ "743":{"name":"信号传输","parent":"625"},
312
+ "745":{"name":"结构材料","parent":"627"},
313
+ "746":{"name":"装饰材料","parent":"627"},
314
+ "747":{"name":"专用材料","parent":"627"},
315
+ "749":{"name":"经销商集团","parent":"629"},
316
+ "750":{"name":"整车制造","parent":"629"},
317
+ "751":{"name":"汽车零配件","parent":"629"},
318
+ "752":{"name":"外型设计","parent":"629"},
319
+ "753":{"name":"平版印刷","parent":"630"},
320
+ "754":{"name":"凸版印刷","parent":"630"},
321
+ "755":{"name":"凹版印刷","parent":"630"},
322
+ "756":{"name":"孔版印刷","parent":"630"},
323
+ "757":{"name":"印刷用纸","parent":"631"},
324
+ "758":{"name":"书写、制图及复制用纸","parent":"631"},
325
+ "759":{"name":"包装用纸","parent":"631"},
326
+ "760":{"name":"生活、卫生及装饰用纸","parent":"631"},
327
+ "761":{"name":"技术用纸","parent":"631"},
328
+ "762":{"name":"加工纸原纸","parent":"631"},
329
+ "763":{"name":"食品包装","parent":"632"},
330
+ "764":{"name":"医药包装","parent":"632"},
331
+ "765":{"name":"日化包装","parent":"632"},
332
+ "766":{"name":"物流包装","parent":"632"},
333
+ "767":{"name":"礼品包装","parent":"632"},
334
+ "768":{"name":"电子五金包装","parent":"632"},
335
+ "769":{"name":"汽车服务","parent":"525"},
336
+ "770":{"name":"汽车保养","parent":"769"},
337
+ "771":{"name":"租车","parent":"769"},
338
+ "773":{"name":"出租车","parent":"769"},
339
+ "774":{"name":"代驾","parent":"769"},
340
+ "775":{"name":"发电","parent":"666"},
341
+ "777":{"name":"输配电","parent":"666"},
342
+ "779":{"name":"风电","parent":"667"},
343
+ "780":{"name":"光伏/太阳能","parent":"667"},
344
+ "781":{"name":"生物质发电","parent":"667"},
345
+ "782":{"name":"煤化工","parent":"667"},
346
+ "783":{"name":"垃圾发电","parent":"667"},
347
+ "784":{"name":"核电","parent":"667"},
348
+ "785":{"name":"能源矿产","parent":"669"},
349
+ "786":{"name":"金属矿产","parent":"669"},
350
+ "787":{"name":"非金属矿产","parent":"669"},
351
+ "788":{"name":"水气矿产","parent":"669"},
352
+ "789":{"name":"锅炉","parent":"775"},
353
+ "790":{"name":"发电机","parent":"775"},
354
+ "791":{"name":"汽轮机","parent":"775"},
355
+ "792":{"name":"燃机","parent":"775"},
356
+ "793":{"name":"冷却","parent":"775"},
357
+ "794":{"name":"电力设计院","parent":"775"},
358
+ "795":{"name":"高压输配电","parent":"777"},
359
+ "796":{"name":"中压输配电","parent":"777"},
360
+ "797":{"name":"低压输配电","parent":"777"},
361
+ "798":{"name":"继电保护","parent":"777"},
362
+ "799":{"name":"智能电网","parent":"777"},
363
+ "800":{"name":"小学","parent":"516"},
364
+ "801":{"name":"电动车","parent":"519"},
365
+ "802":{"name":"皮具箱包","parent":"518"},
366
+ "803":{"name":"医药制造","parent":"522"},
367
+ "804":{"name":"电器销售","parent":"536"},
368
+ "805":{"name":"塑料制品","parent":"527"},
369
+ "806":{"name":"公益基金会","parent":"530"},
370
+ "807":{"name":"美发服务","parent":"525"},
371
+ "808":{"name":"农业养殖","parent":"531"},
372
+ "809":{"name":"金融服务","parent":"513"},
373
+ "810":{"name":"商业地产综合体","parent":"514"},
374
+ "811":{"name":"美容服务","parent":"525"},
375
+ "812":{"name":"灯饰","parent":"518"},
376
+ "813":{"name":"油墨颜料产品","parent":"527"},
377
+ "814":{"name":"眼镜制造","parent":"518"},
378
+ "815":{"name":"农业生物技术","parent":"531"},
379
+ "816":{"name":"体育用品","parent":"518"},
380
+ "817":{"name":"保健用品","parent":"524"},
381
+ "818":{"name":"化学化工产品","parent":"527"},
382
+ "819":{"name":"饲料","parent":"531"},
383
+ "821":{"name":"保安服务","parent":"525"},
384
+ "822":{"name":"干细胞技术","parent":"522"},
385
+ "824":{"name":"农药化肥","parent":"527"},
386
+ "825":{"name":"卫生洁具","parent":"518"},
387
+ "826":{"name":"体育器材、场馆","parent":"518"},
388
+ "827":{"name":"饲料加工","parent":"531"},
389
+ "828":{"name":"测绘服务","parent":"529"},
390
+ "830":{"name":"金属船舶制造","parent":"519"},
391
+ "831":{"name":"基因工程","parent":"522"},
392
+ "832":{"name":"花卉服务","parent":"536"},
393
+ "833":{"name":"农业种植","parent":"531"},
394
+ "834":{"name":"皮革制品","parent":"518"},
395
+ "835":{"name":"地理信息加工服务","parent":"529"},
396
+ "836":{"name":"机器人","parent":"519"},
397
+ "837":{"name":"礼品","parent":"518"},
398
+ "838":{"name":"理发及美容服务","parent":"525"},
399
+ "839":{"name":"其他清洁服务","parent":"525"},
400
+ "840":{"name":"硅胶材料","parent":"527"},
401
+ "841":{"name":"茶叶销售","parent":"518"},
402
+ "842":{"name":"彩票活动","parent":"529"},
403
+ "843":{"name":"化妆培训","parent":"516"},
404
+ "844":{"name":"鞋业","parent":"518"},
405
+ "845":{"name":"酒店用品","parent":"518"},
406
+ "846":{"name":"复合材料","parent":"527"},
407
+ "847":{"name":"房地产工程建设","parent":"548"},
408
+ "848":{"name":"知识产权服务","parent":"559"},
409
+ "849":{"name":"新型建材","parent":"627"},
410
+ "850":{"name":"企业投资咨询","parent":"567"},
411
+ "851":{"name":"含乳饮料和植物蛋白饮料制造","parent":"594"},
412
+ "852":{"name":"汽车检测设备","parent":"629"},
413
+ "853":{"name":"手机通讯器材","parent":"417"},
414
+ "854":{"name":"环保材料","parent":"672"},
415
+ "855":{"name":"交通设施","parent":"554"},
416
+ "856":{"name":"电子器件","parent":"419"},
417
+ "857":{"name":"啤酒","parent":"594"},
418
+ "858":{"name":"生态旅游","parent":"657"},
419
+ "859":{"name":"自动化设备","parent":"626"},
420
+ "860":{"name":"软件开发","parent":"414"},
421
+ "861":{"name":"葡萄酒销售","parent":"594"},
422
+ "862":{"name":"钢材","parent":"633"},
423
+ "863":{"name":"餐饮培训","parent":"656"},
424
+ "864":{"name":"速冻食品","parent":"593"},
425
+ "865":{"name":"空气环保","parent":"672"},
426
+ "866":{"name":"互联网房地产经纪服务","parent":"550"},
427
+ "867":{"name":"食品添加剂","parent":"593"},
428
+ "868":{"name":"演艺传播","parent":"585"},
429
+ "869":{"name":"信用卡","parent":"537"},
430
+ "870":{"name":"报纸期刊广告","parent":"579"},
431
+ "871":{"name":"摄影","parent":"525"},
432
+ "872":{"name":"手机软件","parent":"414"},
433
+ "873":{"name":"地坪建材","parent":"627"},
434
+ "874":{"name":"企业管理咨询","parent":"567"},
435
+ "875":{"name":"幼儿教育","parent":"570"},
436
+ "876":{"name":"系统集成","parent":"416"},
437
+ "877":{"name":"皮革服饰","parent":"597"},
438
+ "878":{"name":"保健食品","parent":"593"},
439
+ "879":{"name":"叉车","parent":"620"},
440
+ "880":{"name":"厨卫电器","parent":"601"},
441
+ "882":{"name":"地暖设备","parent":"627"},
442
+ "883":{"name":"钢结构制造","parent":"548"},
443
+ "884":{"name":"投影机","parent":"606"},
444
+ "885":{"name":"啤酒销售","parent":"594"},
445
+ "886":{"name":"度假村旅游","parent":"657"},
446
+ "887":{"name":"电力元件设备","parent":"626"},
447
+ "888":{"name":"管理软件","parent":"414"},
448
+ "889":{"name":"轴承","parent":"628"},
449
+ "890":{"name":"餐饮设备","parent":"656"},
450
+ "891":{"name":"肉制品及副产品加工","parent":"593"},
451
+ "892":{"name":"艺术收藏品投资交易","parent":"584"},
452
+ "893":{"name":"净水器","parent":"601"},
453
+ "894":{"name":"进口食品","parent":"593"},
454
+ "895":{"name":"娱乐文化传播","parent":"585"},
455
+ "896":{"name":"文化传播","parent":"585"},
456
+ "897":{"name":"商旅传媒","parent":"580"},
457
+ "898":{"name":"广告设计制作","parent":"579"},
458
+ "899":{"name":"金属丝绳及其制品制造","parent":"627"},
459
+ "900":{"name":"建筑涂料","parent":"627"},
460
+ "901":{"name":"抵押贷款","parent":"543"},
461
+ "902":{"name":"早教","parent":"570"},
462
+ "903":{"name":"电影放映","parent":"583"},
463
+ "904":{"name":"内衣服饰","parent":"597"},
464
+ "905":{"name":"无线网络通信","parent":"418"},
465
+ "906":{"name":"记忆卡","parent":"415"},
466
+ "907":{"name":"女装服饰","parent":"597"},
467
+ "908":{"name":"建筑机械","parent":"620"},
468
+ "909":{"name":"制冷电器","parent":"601"},
469
+ "910":{"name":"通信设备","parent":"417"},
470
+ "911":{"name":"空调设备","parent":"601"},
471
+ "912":{"name":"建筑装饰","parent":"553"},
472
+ "913":{"name":"办公设备","parent":"603"},
473
+ "916":{"name":"数据处理软件","parent":"414"},
474
+ "917":{"name":"葡萄酒贸易","parent":"594"},
475
+ "918":{"name":"通讯器材","parent":"417"},
476
+ "919":{"name":"铜业","parent":"633"},
477
+ "920":{"name":"食堂","parent":"656"},
478
+ "921":{"name":"糖果零食","parent":"593"},
479
+ "922":{"name":"文化艺术传播","parent":"584"},
480
+ "923":{"name":"太阳能电器","parent":"601"},
481
+ "924":{"name":"药品零售","parent":"645"},
482
+ "925":{"name":"果蔬食品","parent":"593"},
483
+ "926":{"name":"文化活动策划","parent":"585"},
484
+ "928":{"name":"汽车广告","parent":"657"},
485
+ "929":{"name":"条码设备","parent":"630"},
486
+ "930":{"name":"建筑石材","parent":"627"},
487
+ "931":{"name":"贵金属","parent":"545"},
488
+ "932":{"name":"体育","parent":"660"},
489
+ "933":{"name":"金融信息服务","parent":"414"},
490
+ "934":{"name":"玻璃建材","parent":"627"},
491
+ "935":{"name":"家教","parent":"569"},
492
+ "936":{"name":"歌舞厅娱乐活动","parent":"586"},
493
+ "937":{"name":"计算机服务器","parent":"415"},
494
+ "938":{"name":"管道","parent":"627"},
495
+ "939":{"name":"婴幼儿服饰","parent":"597"},
496
+ "940":{"name":"热水器","parent":"601"},
497
+ "941":{"name":"计算机及零部件制造","parent":"415"},
498
+ "942":{"name":"钢铁贸易","parent":"633"},
499
+ "944":{"name":"包装材料","parent":"632"},
500
+ "945":{"name":"计算机办公设备","parent":"603"},
501
+ "946":{"name":"白酒","parent":"594"},
502
+ "948":{"name":"发动机","parent":"620"},
503
+ "949":{"name":"快餐服务","parent":"656"},
504
+ "950":{"name":"酒类销售","parent":"594"},
505
+ "951":{"name":"电子产品、机电设备","parent":"626"},
506
+ "952":{"name":"激光设备","parent":"626"},
507
+ "953":{"name":"餐饮策划","parent":"656"},
508
+ "954":{"name":"饮料、食品","parent":"594"},
509
+ "955":{"name":"文化娱乐经纪","parent":"585"},
510
+ "956":{"name":"天然气","parent":"665"},
511
+ "957":{"name":"农副食品","parent":"593"},
512
+ "958":{"name":"艺术表演","parent":"585"},
513
+ "959":{"name":"石膏、水泥制品及类似制品制造","parent":"627"},
514
+ "960":{"name":"橱柜","parent":"602"},
515
+ "961":{"name":"管理培训","parent":"577"},
516
+ "962":{"name":"男装服饰","parent":"597"},
517
+ "963":{"name":"化肥制造","parent":"675"},
518
+ "964":{"name":"童装服饰","parent":"597"},
519
+ "965":{"name":"电源电池","parent":"626"},
520
+ "966":{"name":"家电维修","parent":"664"},
521
+ "967":{"name":"光电子器件","parent":"419"},
522
+ "968":{"name":"旅行社服务","parent":"657"},
523
+ "969":{"name":"电线、电缆制造","parent":"626"},
524
+ "970":{"name":"软件开发、信息系统集成","parent":"419"},
525
+ "971":{"name":"白酒制造","parent":"594"},
526
+ "973":{"name":"甜品服务","parent":"656"},
527
+ "974":{"name":"糕点、面包制造","parent":"593"},
528
+ "975":{"name":"木工机械","parent":"620"},
529
+ "976":{"name":"酒吧服务","parent":"656"},
530
+ "977":{"name":"火腿肠","parent":"593"},
531
+ "978":{"name":"广告策划推广","parent":"579"},
532
+ "979":{"name":"新能源产品和生产装备制造","parent":"667"},
533
+ "980":{"name":"调味品","parent":"593"},
534
+ "981":{"name":"礼仪表演","parent":"585"},
535
+ "982":{"name":"劳务派遣","parent":"560"},
536
+ "983":{"name":"建材零售","parent":"627"},
537
+ "984":{"name":"商品交易中心","parent":"545"},
538
+ "985":{"name":"体育推广","parent":"585"},
539
+ "986":{"name":"茶饮料及其他饮料制造","parent":"594"},
540
+ "987":{"name":"金属建材","parent":"627"},
541
+ "988":{"name":"职业技能培训","parent":"571"},
542
+ "989":{"name":"网吧活动","parent":"586"},
543
+ "990":{"name":"洗衣服务","parent":"658"},
544
+ "991":{"name":"管道工程","parent":"554"},
545
+ "992":{"name":"通信工程","parent":"417"},
546
+ "993":{"name":"电子元器件","parent":"626"},
547
+ "994":{"name":"电子设备","parent":"419"},
548
+ "995":{"name":"茶馆服务","parent":"656"},
549
+ "996":{"name":"旅游开发","parent":"657"},
550
+ "997":{"name":"视频通讯","parent":"417"},
551
+ "998":{"name":"白酒销售","parent":"594"},
552
+ "1000":{"name":"咖啡馆服务","parent":"656"},
553
+ "1001":{"name":"食品零售","parent":"593"},
554
+ "1002":{"name":"健康疗养旅游","parent":"655"},
555
+ "1003":{"name":"粮油食品","parent":"593"},
556
+ "1004":{"name":"儿童教育影视","parent":"583"},
557
+ "1005":{"name":"新能源发电","parent":"667"},
558
+ "1006":{"name":"旅游策划","parent":"657"},
559
+ "1007":{"name":"绘画","parent":"575"},
560
+ "1008":{"name":"方便面及其他方便食品","parent":"593"},
561
+ "1009":{"name":"房地产经纪","parent":"550"},
562
+ "1010":{"name":"母婴家政","parent":"661"},
563
+ "1011":{"name":"居家养老健康服务","parent":"661"},
564
+ "1012":{"name":"文化艺术投资","parent":"545"},
565
+ "1013":{"name":"运动健身","parent":"660"},
566
+ "1014":{"name":"瓶(罐)装饮用水制造","parent":"594"},
567
+ "1015":{"name":"金属门窗","parent":"627"},
568
+ "1016":{"name":"机动车检测","parent":"563"},
569
+ "1017":{"name":"货物运输","parent":"634"},
570
+ "1018":{"name":"服饰专卖","parent":"690"},
571
+ "1019":{"name":"酒店服装","parent":"597"},
572
+ "1020":{"name":"通讯软件","parent":"417"},
573
+ "1021":{"name":"消防工程","parent":"554"},
574
+ "1022":{"name":"嵌入式电子系统","parent":"419"},
575
+ "1023":{"name":"航空票务","parent":"636"},
576
+ "1024":{"name":"电气设备","parent":"626"},
577
+ "1025":{"name":"酒业贸易","parent":"594"},
578
+ "1027":{"name":"其他饮料及冷饮服务","parent":"656"},
579
+ "1028":{"name":"乳制品","parent":"593"},
580
+ "1029":{"name":"新闻期刊出版","parent":"588"},
581
+ "1030":{"name":"水污染治理","parent":"672"},
582
+ "1031":{"name":"谷物食品","parent":"593"},
583
+ "1032":{"name":"数字动漫设计制造服务","parent":"590"},
584
+ "1033":{"name":"医院","parent":"646"},
585
+ "1034":{"name":"旅游广告","parent":"657"},
586
+ "1035":{"name":"办公家具","parent":"602"},
587
+ "1036":{"name":"房地产营销策划","parent":"550"},
588
+ "1037":{"name":"保洁家政","parent":"661"},
589
+ "1038":{"name":"水泥制造","parent":"627"},
590
+ "1039":{"name":"市场研究咨询","parent":"567"},
591
+ "1040":{"name":"驾校","parent":"571"},
592
+ "1041":{"name":"正餐服务","parent":"656"},
593
+ "1043":{"name":"机动车燃油","parent":"665"},
594
+ "1044":{"name":"食品","parent":"593"},
595
+ "1045":{"name":"新能源汽车","parent":"629"},
596
+ "1046":{"name":"手机无线网络推广","parent":"417"},
597
+ "1047":{"name":"环保设备","parent":"672"},
598
+ "1048":{"name":"通讯工程","parent":"418"},
599
+ "1049":{"name":"半导体集成电路","parent":"419"},
600
+ "1050":{"name":"航空服务","parent":"636"},
601
+ "1051":{"name":"电机设备","parent":"626"},
602
+ "1052":{"name":"档案软件","parent":"414"},
603
+ "1053":{"name":"冷链物流服务","parent":"634"},
604
+ "1054":{"name":"小吃服务","parent":"656"},
605
+ "1055":{"name":"水产品加工","parent":"593"},
606
+ "1056":{"name":"图书出版","parent":"588"},
607
+ "1057":{"name":"固体废物治理","parent":"672"},
608
+ "1059":{"name":"坚果食品","parent":"593"},
609
+ "1060":{"name":"广告传媒","parent":"579"},
610
+ "1061":{"name":"电梯","parent":"622"},
611
+ "1062":{"name":"社区医疗与卫生院","parent":"646"},
612
+ "1063":{"name":"广告、印刷包装","parent":"630"},
613
+ "1064":{"name":"婚纱礼服","parent":"662"},
614
+ "1065":{"name":"地毯","parent":"602"},
615
+ "1066":{"name":"互联网物业","parent":"551"},
616
+ "1067":{"name":"跨境电商","parent":"3"},
617
+ "1068":{"name":"信息安全、系统集成","parent":"9"},
618
+ "1069":{"name":"专用汽车制造","parent":"750"},
619
+ "1070":{"name":"商品贸易","parent":"3"},
620
+ "1071":{"name":"墙壁装饰材料","parent":"746"},
621
+ "1072":{"name":"窗帘装饰材料","parent":"746"},
622
+ "1073":{"name":"电子商务、本地生活服务","parent":"3"},
623
+ "1075":{"name":"白酒电子商务","parent":"3"},
624
+ "1076":{"name":"商品贸易、电子商务","parent":"3"},
625
+ "1077":{"name":"木质装饰材料","parent":"746"},
626
+ "1078":{"name":"电子商务、汽车电商交易平台","parent":"3"},
627
+ "1079":{"name":"汽车轮胎","parent":"751"},
628
+ "1080":{"name":"气体压缩机械制造","parent":"732"},
629
+ "1081":{"name":"家装家具电子商务","parent":"3"},
630
+ "1082":{"name":"化妆品电子商务","parent":"3"},
631
+ "1083":{"name":"汽车销售","parent":"749"},
632
+ "1084":{"name":"新闻资讯网站","parent":"510"},
633
+ "1085":{"name":"母婴电商","parent":"3"},
634
+ "1086":{"name":"电商商务、收藏品交易","parent":"3"},
635
+ "1088":{"name":"电子商务、数码产品","parent":"3"},
636
+ "1089":{"name":"二手车交易","parent":"749"},
637
+ "1090":{"name":"游戏制作服务","parent":"5"},
638
+ "1091":{"name":"母婴服务","parent":"510"},
639
+ "1092":{"name":"家具电子商务","parent":"3"},
640
+ "1093":{"name":"汽车配件电子商务","parent":"3"},
641
+ "1094":{"name":"输配电设备","parent":"777"},
642
+ "1095":{"name":"矿山设备","parent":"727"},
643
+ "1096":{"name":"机床机械","parent":"726"},
644
+ "1097":{"name":"农产品电商","parent":"3"},
645
+ "1098":{"name":"陶瓷装饰材料","parent":"746"},
646
+ "1099":{"name":"车载联网设备","parent":"487"},
647
+ "1100":{"name":"汽车销售电子商务","parent":"3"},
648
+ "1101":{"name":"石油设备","parent":"730"},
649
+ "1102":{"name":"智能家居","parent":"487"},
650
+ "1103":{"name":"散热器","parent":"751"},
651
+ "1104":{"name":"电力工程","parent":"775"},
652
+ "1105":{"name":"生鲜电商","parent":"3"},
653
+ "1106":{"name":"互联网数据服务","parent":"490"},
654
+ "1107":{"name":"房车、商务车销售","parent":"749"},
655
+ "1108":{"name":"茶叶电子商务","parent":"3"},
656
+ "1109":{"name":"酒类电子商务","parent":"3"},
657
+ "1110":{"name":"阀门","parent":"730"},
658
+ "1111":{"name":"食品电商","parent":"3"},
659
+ "1112":{"name":"儿童摄影","parent":"871"},
660
+ "1113":{"name":"广告摄影","parent":"871"},
661
+ "1114":{"name":"婚纱摄影","parent":"871"},
662
+ "1115":{"name":"模具制造","parent":"620"},
663
+ "1116":{"name":"汽车模具","parent":"629"},
664
+ "1117":{"name":"认证咨询","parent":"567"},
665
+ "1118":{"name":"数字视觉制作服务","parent":"590"},
666
+ "1119":{"name":"牙科及医疗器械","parent":"646"},
667
+ "1120":{"name":"猎头招聘","parent":"560"},
668
+ "1121":{"name":"家居","parent":"518"},
669
+ "1122":{"name":"收藏品","parent":"518"},
670
+ "1123":{"name":"首饰","parent":"518"},
671
+ "1124":{"name":"工艺品","parent":"518"},
672
+ "1125":{"name":"财务","parent":"515"},
673
+ "1126":{"name":"税务","parent":"515"},
674
+ "1127":{"name":"分类信息","parent":"2"},
675
+ "1128":{"name":"宠物","parent":"0"},
676
+ "1129":{"name":"快消品","parent":"518"},
677
+ "1130":{"name":"人工智能","parent":"2"},
678
+ "1131":{"name":"农/林/牧/渔","parent":"0"}
679
+ }
680
+
681
+ def get_names(id):
682
+ id = str(id)
683
+ nms = []
684
+ d = TBL.get(id)
685
+ if not d:return []
686
+ nms.append(d["name"])
687
+ p = get_names(d["parent"])
688
+ if p: nms.extend(p)
689
+ return nms
690
+
691
+ if __name__ == "__main__":
692
+ print(get_names("1119"))
deepdoc/parser/resume/entities/regions.py ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TBL = {
2
+ "2":{"name":"北京","parent":"1"},
3
+ "3":{"name":"天津","parent":"1"},
4
+ "4":{"name":"河北","parent":"1"},
5
+ "5":{"name":"山西","parent":"1"},
6
+ "6":{"name":"内蒙古","parent":"1"},
7
+ "7":{"name":"辽宁","parent":"1"},
8
+ "8":{"name":"吉林","parent":"1"},
9
+ "9":{"name":"黑龙江","parent":"1"},
10
+ "10":{"name":"上海","parent":"1"},
11
+ "11":{"name":"江苏","parent":"1"},
12
+ "12":{"name":"浙江","parent":"1"},
13
+ "13":{"name":"安徽","parent":"1"},
14
+ "14":{"name":"福建","parent":"1"},
15
+ "15":{"name":"江西","parent":"1"},
16
+ "16":{"name":"山东","parent":"1"},
17
+ "17":{"name":"河南","parent":"1"},
18
+ "18":{"name":"湖北","parent":"1"},
19
+ "19":{"name":"湖南","parent":"1"},
20
+ "20":{"name":"广东","parent":"1"},
21
+ "21":{"name":"广西","parent":"1"},
22
+ "22":{"name":"海南","parent":"1"},
23
+ "23":{"name":"重庆","parent":"1"},
24
+ "24":{"name":"四川","parent":"1"},
25
+ "25":{"name":"贵州","parent":"1"},
26
+ "26":{"name":"云南","parent":"1"},
27
+ "27":{"name":"西藏","parent":"1"},
28
+ "28":{"name":"陕西","parent":"1"},
29
+ "29":{"name":"甘肃","parent":"1"},
30
+ "30":{"name":"青海","parent":"1"},
31
+ "31":{"name":"宁夏","parent":"1"},
32
+ "32":{"name":"新疆","parent":"1"},
33
+ "33":{"name":"北京市","parent":"2"},
34
+ "34":{"name":"天津市","parent":"3"},
35
+ "35":{"name":"石家庄市","parent":"4"},
36
+ "36":{"name":"唐山市","parent":"4"},
37
+ "37":{"name":"秦皇岛市","parent":"4"},
38
+ "38":{"name":"邯郸市","parent":"4"},
39
+ "39":{"name":"邢台市","parent":"4"},
40
+ "40":{"name":"保定市","parent":"4"},
41
+ "41":{"name":"张家口市","parent":"4"},
42
+ "42":{"name":"承德市","parent":"4"},
43
+ "43":{"name":"沧州市","parent":"4"},
44
+ "44":{"name":"廊坊市","parent":"4"},
45
+ "45":{"name":"衡水市","parent":"4"},
46
+ "46":{"name":"太原市","parent":"5"},
47
+ "47":{"name":"大同市","parent":"5"},
48
+ "48":{"name":"阳泉市","parent":"5"},
49
+ "49":{"name":"长治市","parent":"5"},
50
+ "50":{"name":"晋城市","parent":"5"},
51
+ "51":{"name":"朔州市","parent":"5"},
52
+ "52":{"name":"晋中市","parent":"5"},
53
+ "53":{"name":"运城市","parent":"5"},
54
+ "54":{"name":"忻州市","parent":"5"},
55
+ "55":{"name":"临汾市","parent":"5"},
56
+ "56":{"name":"吕梁市","parent":"5"},
57
+ "57":{"name":"呼和浩特市","parent":"6"},
58
+ "58":{"name":"包头市","parent":"6"},
59
+ "59":{"name":"乌海市","parent":"6"},
60
+ "60":{"name":"赤峰市","parent":"6"},
61
+ "61":{"name":"通辽市","parent":"6"},
62
+ "62":{"name":"鄂尔多斯市","parent":"6"},
63
+ "63":{"name":"呼伦贝尔市","parent":"6"},
64
+ "64":{"name":"巴彦淖尔市","parent":"6"},
65
+ "65":{"name":"乌兰察布市","parent":"6"},
66
+ "66":{"name":"兴安盟","parent":"6"},
67
+ "67":{"name":"锡林郭勒盟","parent":"6"},
68
+ "68":{"name":"阿拉善盟","parent":"6"},
69
+ "69":{"name":"沈阳市","parent":"7"},
70
+ "70":{"name":"大连市","parent":"7"},
71
+ "71":{"name":"鞍山市","parent":"7"},
72
+ "72":{"name":"抚顺市","parent":"7"},
73
+ "73":{"name":"本溪市","parent":"7"},
74
+ "74":{"name":"丹东市","parent":"7"},
75
+ "75":{"name":"锦州市","parent":"7"},
76
+ "76":{"name":"营口市","parent":"7"},
77
+ "77":{"name":"阜新市","parent":"7"},
78
+ "78":{"name":"辽阳市","parent":"7"},
79
+ "79":{"name":"盘锦市","parent":"7"},
80
+ "80":{"name":"铁岭市","parent":"7"},
81
+ "81":{"name":"朝阳市","parent":"7"},
82
+ "82":{"name":"葫芦岛市","parent":"7"},
83
+ "83":{"name":"长春市","parent":"8"},
84
+ "84":{"name":"吉林市","parent":"8"},
85
+ "85":{"name":"四平市","parent":"8"},
86
+ "86":{"name":"辽源市","parent":"8"},
87
+ "87":{"name":"通化市","parent":"8"},
88
+ "88":{"name":"白山市","parent":"8"},
89
+ "89":{"name":"松原市","parent":"8"},
90
+ "90":{"name":"白城市","parent":"8"},
91
+ "91":{"name":"延边朝鲜族自治州","parent":"8"},
92
+ "92":{"name":"哈尔滨市","parent":"9"},
93
+ "93":{"name":"齐齐哈尔市","parent":"9"},
94
+ "94":{"name":"鸡西市","parent":"9"},
95
+ "95":{"name":"鹤岗市","parent":"9"},
96
+ "96":{"name":"双鸭山市","parent":"9"},
97
+ "97":{"name":"大庆市","parent":"9"},
98
+ "98":{"name":"伊春市","parent":"9"},
99
+ "99":{"name":"佳木斯市","parent":"9"},
100
+ "100":{"name":"七台河市","parent":"9"},
101
+ "101":{"name":"牡丹江市","parent":"9"},
102
+ "102":{"name":"黑河市","parent":"9"},
103
+ "103":{"name":"绥化市","parent":"9"},
104
+ "104":{"name":"大兴安岭地区","parent":"9"},
105
+ "105":{"name":"上海市","parent":"10"},
106
+ "106":{"name":"南京市","parent":"11"},
107
+ "107":{"name":"无锡市","parent":"11"},
108
+ "108":{"name":"徐州市","parent":"11"},
109
+ "109":{"name":"常州市","parent":"11"},
110
+ "110":{"name":"苏州市","parent":"11"},
111
+ "111":{"name":"南通市","parent":"11"},
112
+ "112":{"name":"连云港市","parent":"11"},
113
+ "113":{"name":"淮安市","parent":"11"},
114
+ "114":{"name":"盐城市","parent":"11"},
115
+ "115":{"name":"扬州市","parent":"11"},
116
+ "116":{"name":"镇江市","parent":"11"},
117
+ "117":{"name":"泰州市","parent":"11"},
118
+ "118":{"name":"宿迁市","parent":"11"},
119
+ "119":{"name":"杭州市","parent":"12"},
120
+ "120":{"name":"宁波市","parent":"12"},
121
+ "121":{"name":"温州市","parent":"12"},
122
+ "122":{"name":"嘉兴市","parent":"12"},
123
+ "123":{"name":"湖州市","parent":"12"},
124
+ "124":{"name":"绍兴市","parent":"12"},
125
+ "125":{"name":"金华市","parent":"12"},
126
+ "126":{"name":"衢州市","parent":"12"},
127
+ "127":{"name":"舟山市","parent":"12"},
128
+ "128":{"name":"台州市","parent":"12"},
129
+ "129":{"name":"丽水市","parent":"12"},
130
+ "130":{"name":"合肥市","parent":"13"},
131
+ "131":{"name":"芜湖市","parent":"13"},
132
+ "132":{"name":"蚌埠市","parent":"13"},
133
+ "133":{"name":"淮南市","parent":"13"},
134
+ "134":{"name":"马鞍山市","parent":"13"},
135
+ "135":{"name":"淮北市","parent":"13"},
136
+ "136":{"name":"铜陵市","parent":"13"},
137
+ "137":{"name":"安庆市","parent":"13"},
138
+ "138":{"name":"黄山市","parent":"13"},
139
+ "139":{"name":"滁州市","parent":"13"},
140
+ "140":{"name":"阜阳市","parent":"13"},
141
+ "141":{"name":"宿州市","parent":"13"},
142
+ "143":{"name":"六安市","parent":"13"},
143
+ "144":{"name":"亳州市","parent":"13"},
144
+ "145":{"name":"池州市","parent":"13"},
145
+ "146":{"name":"宣城市","parent":"13"},
146
+ "147":{"name":"福州市","parent":"14"},
147
+ "148":{"name":"厦门市","parent":"14"},
148
+ "149":{"name":"莆田市","parent":"14"},
149
+ "150":{"name":"三明市","parent":"14"},
150
+ "151":{"name":"泉州市","parent":"14"},
151
+ "152":{"name":"漳州市","parent":"14"},
152
+ "153":{"name":"南平市","parent":"14"},
153
+ "154":{"name":"龙岩市","parent":"14"},
154
+ "155":{"name":"宁德市","parent":"14"},
155
+ "156":{"name":"南昌市","parent":"15"},
156
+ "157":{"name":"景德镇市","parent":"15"},
157
+ "158":{"name":"萍乡市","parent":"15"},
158
+ "159":{"name":"九江市","parent":"15"},
159
+ "160":{"name":"新余市","parent":"15"},
160
+ "161":{"name":"鹰潭市","parent":"15"},
161
+ "162":{"name":"赣州市","parent":"15"},
162
+ "163":{"name":"吉安市","parent":"15"},
163
+ "164":{"name":"宜春市","parent":"15"},
164
+ "165":{"name":"抚州市","parent":"15"},
165
+ "166":{"name":"上饶市","parent":"15"},
166
+ "167":{"name":"济南市","parent":"16"},
167
+ "168":{"name":"青岛市","parent":"16"},
168
+ "169":{"name":"淄博市","parent":"16"},
169
+ "170":{"name":"枣庄市","parent":"16"},
170
+ "171":{"name":"东营市","parent":"16"},
171
+ "172":{"name":"烟台市","parent":"16"},
172
+ "173":{"name":"潍坊市","parent":"16"},
173
+ "174":{"name":"济宁市","parent":"16"},
174
+ "175":{"name":"泰安市","parent":"16"},
175
+ "176":{"name":"威海市","parent":"16"},
176
+ "177":{"name":"日照市","parent":"16"},
177
+ "179":{"name":"临沂市","parent":"16"},
178
+ "180":{"name":"德州市","parent":"16"},
179
+ "181":{"name":"聊城市","parent":"16"},
180
+ "182":{"name":"滨州市","parent":"16"},
181
+ "183":{"name":"菏泽市","parent":"16"},
182
+ "184":{"name":"郑州市","parent":"17"},
183
+ "185":{"name":"开封市","parent":"17"},
184
+ "186":{"name":"洛阳市","parent":"17"},
185
+ "187":{"name":"平顶山市","parent":"17"},
186
+ "188":{"name":"安阳市","parent":"17"},
187
+ "189":{"name":"鹤壁市","parent":"17"},
188
+ "190":{"name":"新乡市","parent":"17"},
189
+ "191":{"name":"焦作市","parent":"17"},
190
+ "192":{"name":"濮阳市","parent":"17"},
191
+ "193":{"name":"许昌市","parent":"17"},
192
+ "194":{"name":"漯河市","parent":"17"},
193
+ "195":{"name":"三门峡市","parent":"17"},
194
+ "196":{"name":"南阳市","parent":"17"},
195
+ "197":{"name":"商丘市","parent":"17"},
196
+ "198":{"name":"信阳市","parent":"17"},
197
+ "199":{"name":"周口市","parent":"17"},
198
+ "200":{"name":"驻马店市","parent":"17"},
199
+ "201":{"name":"武汉市","parent":"18"},
200
+ "202":{"name":"黄石市","parent":"18"},
201
+ "203":{"name":"十堰市","parent":"18"},
202
+ "204":{"name":"宜昌市","parent":"18"},
203
+ "205":{"name":"襄阳市","parent":"18"},
204
+ "206":{"name":"鄂州市","parent":"18"},
205
+ "207":{"name":"荆门市","parent":"18"},
206
+ "208":{"name":"孝感市","parent":"18"},
207
+ "209":{"name":"荆州市","parent":"18"},
208
+ "210":{"name":"黄冈市","parent":"18"},
209
+ "211":{"name":"咸宁市","parent":"18"},
210
+ "212":{"name":"随州市","parent":"18"},
211
+ "213":{"name":"恩施土家族苗族自治州","parent":"18"},
212
+ "215":{"name":"长沙市","parent":"19"},
213
+ "216":{"name":"株洲市","parent":"19"},
214
+ "217":{"name":"湘潭市","parent":"19"},
215
+ "218":{"name":"衡阳市","parent":"19"},
216
+ "219":{"name":"邵阳市","parent":"19"},
217
+ "220":{"name":"岳阳市","parent":"19"},
218
+ "221":{"name":"常德市","parent":"19"},
219
+ "222":{"name":"张家界市","parent":"19"},
220
+ "223":{"name":"益阳市","parent":"19"},
221
+ "224":{"name":"郴州市","parent":"19"},
222
+ "225":{"name":"永州市","parent":"19"},
223
+ "226":{"name":"怀化市","parent":"19"},
224
+ "227":{"name":"娄底市","parent":"19"},
225
+ "228":{"name":"湘西土家族苗族自治州","parent":"19"},
226
+ "229":{"name":"广州市","parent":"20"},
227
+ "230":{"name":"韶关市","parent":"20"},
228
+ "231":{"name":"深圳市","parent":"20"},
229
+ "232":{"name":"珠海市","parent":"20"},
230
+ "233":{"name":"汕头市","parent":"20"},
231
+ "234":{"name":"佛山市","parent":"20"},
232
+ "235":{"name":"江门市","parent":"20"},
233
+ "236":{"name":"湛江市","parent":"20"},
234
+ "237":{"name":"茂名市","parent":"20"},
235
+ "238":{"name":"肇庆市","parent":"20"},
236
+ "239":{"name":"惠州市","parent":"20"},
237
+ "240":{"name":"梅州市","parent":"20"},
238
+ "241":{"name":"汕尾市","parent":"20"},
239
+ "242":{"name":"河源市","parent":"20"},
240
+ "243":{"name":"阳江市","parent":"20"},
241
+ "244":{"name":"清远市","parent":"20"},
242
+ "245":{"name":"东莞市","parent":"20"},
243
+ "246":{"name":"中山市","parent":"20"},
244
+ "247":{"name":"潮州市","parent":"20"},
245
+ "248":{"name":"揭阳市","parent":"20"},
246
+ "249":{"name":"云浮市","parent":"20"},
247
+ "250":{"name":"南宁市","parent":"21"},
248
+ "251":{"name":"柳州市","parent":"21"},
249
+ "252":{"name":"桂林市","parent":"21"},
250
+ "253":{"name":"梧州市","parent":"21"},
251
+ "254":{"name":"北海市","parent":"21"},
252
+ "255":{"name":"防城港市","parent":"21"},
253
+ "256":{"name":"钦州市","parent":"21"},
254
+ "257":{"name":"贵港市","parent":"21"},
255
+ "258":{"name":"玉林市","parent":"21"},
256
+ "259":{"name":"百色市","parent":"21"},
257
+ "260":{"name":"贺州市","parent":"21"},
258
+ "261":{"name":"河池市","parent":"21"},
259
+ "262":{"name":"来宾市","parent":"21"},
260
+ "263":{"name":"崇左市","parent":"21"},
261
+ "264":{"name":"海口市","parent":"22"},
262
+ "265":{"name":"三亚市","parent":"22"},
263
+ "267":{"name":"重庆市","parent":"23"},
264
+ "268":{"name":"成都市","parent":"24"},
265
+ "269":{"name":"自贡市","parent":"24"},
266
+ "270":{"name":"攀枝花市","parent":"24"},
267
+ "271":{"name":"泸州市","parent":"24"},
268
+ "272":{"name":"德阳市","parent":"24"},
269
+ "273":{"name":"绵阳市","parent":"24"},
270
+ "274":{"name":"广元市","parent":"24"},
271
+ "275":{"name":"遂宁市","parent":"24"},
272
+ "276":{"name":"内江市","parent":"24"},
273
+ "277":{"name":"乐山市","parent":"24"},
274
+ "278":{"name":"南充市","parent":"24"},
275
+ "279":{"name":"眉山市","parent":"24"},
276
+ "280":{"name":"宜宾市","parent":"24"},
277
+ "281":{"name":"广安市","parent":"24"},
278
+ "282":{"name":"达州市","parent":"24"},
279
+ "283":{"name":"雅安市","parent":"24"},
280
+ "284":{"name":"巴中市","parent":"24"},
281
+ "285":{"name":"资阳市","parent":"24"},
282
+ "286":{"name":"阿坝藏族羌族自治州","parent":"24"},
283
+ "287":{"name":"甘孜藏族自治州","parent":"24"},
284
+ "288":{"name":"凉山彝族自治州","parent":"24"},
285
+ "289":{"name":"贵阳市","parent":"25"},
286
+ "290":{"name":"六盘水市","parent":"25"},
287
+ "291":{"name":"遵义市","parent":"25"},
288
+ "292":{"name":"安顺市","parent":"25"},
289
+ "293":{"name":"铜仁市","parent":"25"},
290
+ "294":{"name":"黔西南布依族苗族自治州","parent":"25"},
291
+ "295":{"name":"毕节市","parent":"25"},
292
+ "296":{"name":"黔东南苗族侗族自治州","parent":"25"},
293
+ "297":{"name":"黔南布依族苗族自治州","parent":"25"},
294
+ "298":{"name":"昆明市","parent":"26"},
295
+ "299":{"name":"曲靖市","parent":"26"},
296
+ "300":{"name":"玉溪市","parent":"26"},
297
+ "301":{"name":"保山市","parent":"26"},
298
+ "302":{"name":"昭通市","parent":"26"},
299
+ "303":{"name":"丽江市","parent":"26"},
300
+ "304":{"name":"普洱市","parent":"26"},
301
+ "305":{"name":"临沧市","parent":"26"},
302
+ "306":{"name":"楚雄彝族自治州","parent":"26"},
303
+ "307":{"name":"红河哈尼族彝族自治州","parent":"26"},
304
+ "308":{"name":"文山壮族苗族自治州","parent":"26"},
305
+ "309":{"name":"西双版纳傣族自治州","parent":"26"},
306
+ "310":{"name":"大理白族自治州","parent":"26"},
307
+ "311":{"name":"德宏傣族景颇族自治州","parent":"26"},
308
+ "312":{"name":"怒江傈僳族自治州","parent":"26"},
309
+ "313":{"name":"迪庆藏族自治州","parent":"26"},
310
+ "314":{"name":"拉萨市","parent":"27"},
311
+ "315":{"name":"昌都市","parent":"27"},
312
+ "316":{"name":"山南市","parent":"27"},
313
+ "317":{"name":"日喀则市","parent":"27"},
314
+ "318":{"name":"那曲市","parent":"27"},
315
+ "319":{"name":"阿里地区","parent":"27"},
316
+ "320":{"name":"林芝市","parent":"27"},
317
+ "321":{"name":"西安市","parent":"28"},
318
+ "322":{"name":"铜川市","parent":"28"},
319
+ "323":{"name":"宝鸡市","parent":"28"},
320
+ "324":{"name":"咸阳市","parent":"28"},
321
+ "325":{"name":"渭南市","parent":"28"},
322
+ "326":{"name":"延安市","parent":"28"},
323
+ "327":{"name":"汉中市","parent":"28"},
324
+ "328":{"name":"榆林市","parent":"28"},
325
+ "329":{"name":"安康市","parent":"28"},
326
+ "330":{"name":"商洛市","parent":"28"},
327
+ "331":{"name":"兰州市","parent":"29"},
328
+ "332":{"name":"嘉峪关市","parent":"29"},
329
+ "333":{"name":"金昌市","parent":"29"},
330
+ "334":{"name":"白银市","parent":"29"},
331
+ "335":{"name":"天水市","parent":"29"},
332
+ "336":{"name":"武威市","parent":"29"},
333
+ "337":{"name":"张掖市","parent":"29"},
334
+ "338":{"name":"平凉市","parent":"29"},
335
+ "339":{"name":"酒泉市","parent":"29"},
336
+ "340":{"name":"庆阳市","parent":"29"},
337
+ "341":{"name":"定西市","parent":"29"},
338
+ "342":{"name":"陇南市","parent":"29"},
339
+ "343":{"name":"临夏回族自治州","parent":"29"},
340
+ "344":{"name":"甘南藏族自治州","parent":"29"},
341
+ "345":{"name":"西宁市","parent":"30"},
342
+ "346":{"name":"海东市","parent":"30"},
343
+ "347":{"name":"海北藏族自治州","parent":"30"},
344
+ "348":{"name":"黄南藏族自治州","parent":"30"},
345
+ "349":{"name":"海南藏族自治州","parent":"30"},
346
+ "350":{"name":"果洛藏族自治州","parent":"30"},
347
+ "351":{"name":"玉树藏族自治州","parent":"30"},
348
+ "352":{"name":"海西蒙古族藏族自治州","parent":"30"},
349
+ "353":{"name":"银川市","parent":"31"},
350
+ "354":{"name":"石嘴山市","parent":"31"},
351
+ "355":{"name":"吴忠市","parent":"31"},
352
+ "356":{"name":"固原市","parent":"31"},
353
+ "357":{"name":"中卫市","parent":"31"},
354
+ "358":{"name":"乌鲁木齐市","parent":"32"},
355
+ "359":{"name":"克拉玛依市","parent":"32"},
356
+ "360":{"name":"吐鲁番市","parent":"32"},
357
+ "361":{"name":"哈密市","parent":"32"},
358
+ "362":{"name":"昌吉回族自治州","parent":"32"},
359
+ "363":{"name":"博尔塔拉蒙古自治州","parent":"32"},
360
+ "364":{"name":"巴音郭楞蒙古自治州","parent":"32"},
361
+ "365":{"name":"阿克苏地区","parent":"32"},
362
+ "366":{"name":"克孜勒苏柯尔克孜自治州","parent":"32"},
363
+ "367":{"name":"喀什地区","parent":"32"},
364
+ "368":{"name":"和田地区","parent":"32"},
365
+ "369":{"name":"伊犁哈萨克自治州","parent":"32"},
366
+ "370":{"name":"塔城地区","parent":"32"},
367
+ "371":{"name":"阿勒泰地区","parent":"32"},
368
+ "372":{"name":"新疆省直辖行政单位","parent":"32"},
369
+ "373":{"name":"可克达拉市","parent":"32"},
370
+ "374":{"name":"昆玉市","parent":"32"},
371
+ "375":{"name":"胡杨河市","parent":"32"},
372
+ "376":{"name":"双河市","parent":"32"},
373
+ "3560":{"name":"北票市","parent":"7"},
374
+ "3615":{"name":"高州市","parent":"20"},
375
+ "3651":{"name":"济源市","parent":"17"},
376
+ "3662":{"name":"胶南市","parent":"16"},
377
+ "3683":{"name":"老河口市","parent":"18"},
378
+ "3758":{"name":"沙河市","parent":"4"},
379
+ "3822":{"name":"宜城市","parent":"18"},
380
+ "3842":{"name":"枣阳市","parent":"18"},
381
+ "3850":{"name":"肇东市","parent":"9"},
382
+ "3905":{"name":"澳门","parent":"1"},
383
+ "3906":{"name":"澳门","parent":"3905"},
384
+ "3907":{"name":"香港","parent":"1"},
385
+ "3908":{"name":"香港","parent":"3907"},
386
+ "3947":{"name":"仙桃市","parent":"18"},
387
+ "3954":{"name":"台湾","parent":"1"},
388
+ "3955":{"name":"台湾","parent":"3954"},
389
+ "3956":{"name":"海外","parent":"1"},
390
+ "3957":{"name":"海外","parent":"3956"},
391
+ "3958":{"name":"美国","parent":"3956"},
392
+ "3959":{"name":"加拿大","parent":"3956"},
393
+ "3961":{"name":"日本","parent":"3956"},
394
+ "3962":{"name":"韩国","parent":"3956"},
395
+ "3963":{"name":"德国","parent":"3956"},
396
+ "3964":{"name":"英国","parent":"3956"},
397
+ "3965":{"name":"意大利","parent":"3956"},
398
+ "3966":{"name":"西班牙","parent":"3956"},
399
+ "3967":{"name":"法国","parent":"3956"},
400
+ "3968":{"name":"澳大利亚","parent":"3956"},
401
+ "3969":{"name":"东城区","parent":"2"},
402
+ "3970":{"name":"西城区","parent":"2"},
403
+ "3971":{"name":"崇文区","parent":"2"},
404
+ "3972":{"name":"宣武区","parent":"2"},
405
+ "3973":{"name":"朝阳区","parent":"2"},
406
+ "3974":{"name":"海淀区","parent":"2"},
407
+ "3975":{"name":"丰台区","parent":"2"},
408
+ "3976":{"name":"石景山区","parent":"2"},
409
+ "3977":{"name":"门头沟区","parent":"2"},
410
+ "3978":{"name":"房山区","parent":"2"},
411
+ "3979":{"name":"通州区","parent":"2"},
412
+ "3980":{"name":"顺义区","parent":"2"},
413
+ "3981":{"name":"昌平区","parent":"2"},
414
+ "3982":{"name":"大兴区","parent":"2"},
415
+ "3983":{"name":"平谷区","parent":"2"},
416
+ "3984":{"name":"怀柔区","parent":"2"},
417
+ "3985":{"name":"密云区","parent":"2"},
418
+ "3986":{"name":"延庆区","parent":"2"},
419
+ "3987":{"name":"黄浦区","parent":"10"},
420
+ "3988":{"name":"徐汇区","parent":"10"},
421
+ "3989":{"name":"长宁区","parent":"10"},
422
+ "3990":{"name":"静安区","parent":"10"},
423
+ "3991":{"name":"普陀区","parent":"10"},
424
+ "3992":{"name":"闸北区","parent":"10"},
425
+ "3993":{"name":"虹口区","parent":"10"},
426
+ "3994":{"name":"杨浦区","parent":"10"},
427
+ "3995":{"name":"宝山区","parent":"10"},
428
+ "3996":{"name":"闵行区","parent":"10"},
429
+ "3997":{"name":"嘉定区","parent":"10"},
430
+ "3998":{"name":"浦东新区","parent":"10"},
431
+ "3999":{"name":"松江区","parent":"10"},
432
+ "4000":{"name":"金山区","parent":"10"},
433
+ "4001":{"name":"青浦区","parent":"10"},
434
+ "4002":{"name":"奉贤区","parent":"10"},
435
+ "4003":{"name":"崇明区","parent":"10"},
436
+ "4004":{"name":"和平区","parent":"3"},
437
+ "4005":{"name":"河东区","parent":"3"},
438
+ "4006":{"name":"河西区","parent":"3"},
439
+ "4007":{"name":"南开区","parent":"3"},
440
+ "4008":{"name":"红桥区","parent":"3"},
441
+ "4009":{"name":"河北区","parent":"3"},
442
+ "4010":{"name":"滨海新区","parent":"3"},
443
+ "4011":{"name":"东丽区","parent":"3"},
444
+ "4012":{"name":"西青区","parent":"3"},
445
+ "4013":{"name":"北辰区","parent":"3"},
446
+ "4014":{"name":"津南区","parent":"3"},
447
+ "4015":{"name":"武清区","parent":"3"},
448
+ "4016":{"name":"宝坻区","parent":"3"},
449
+ "4017":{"name":"静海区","parent":"3"},
450
+ "4018":{"name":"宁河区","parent":"3"},
451
+ "4019":{"name":"蓟州区","parent":"3"},
452
+ "4020":{"name":"渝中区","parent":"23"},
453
+ "4021":{"name":"江北区","parent":"23"},
454
+ "4022":{"name":"南岸区","parent":"23"},
455
+ "4023":{"name":"沙坪坝区","parent":"23"},
456
+ "4024":{"name":"九龙坡区","parent":"23"},
457
+ "4025":{"name":"大渡口区","parent":"23"},
458
+ "4026":{"name":"渝北区","parent":"23"},
459
+ "4027":{"name":"巴南区","parent":"23"},
460
+ "4028":{"name":"北碚区","parent":"23"},
461
+ "4029":{"name":"万州区","parent":"23"},
462
+ "4030":{"name":"黔江区","parent":"23"},
463
+ "4031":{"name":"永川区","parent":"23"},
464
+ "4032":{"name":"涪陵区","parent":"23"},
465
+ "4033":{"name":"江津区","parent":"23"},
466
+ "4034":{"name":"合川区","parent":"23"},
467
+ "4035":{"name":"双桥区","parent":"23"},
468
+ "4036":{"name":"万盛区","parent":"23"},
469
+ "4037":{"name":"荣昌区","parent":"23"},
470
+ "4038":{"name":"大足区","parent":"23"},
471
+ "4039":{"name":"璧山区","parent":"23"},
472
+ "4040":{"name":"铜梁区","parent":"23"},
473
+ "4041":{"name":"潼南区","parent":"23"},
474
+ "4042":{"name":"綦江区","parent":"23"},
475
+ "4043":{"name":"忠县","parent":"23"},
476
+ "4044":{"name":"开州区","parent":"23"},
477
+ "4045":{"name":"云阳县","parent":"23"},
478
+ "4046":{"name":"梁平区","parent":"23"},
479
+ "4047":{"name":"垫江县","parent":"23"},
480
+ "4048":{"name":"丰都县","parent":"23"},
481
+ "4049":{"name":"奉节县","parent":"23"},
482
+ "4050":{"name":"巫山县","parent":"23"},
483
+ "4051":{"name":"巫溪县","parent":"23"},
484
+ "4052":{"name":"城口县","parent":"23"},
485
+ "4053":{"name":"武隆区","parent":"23"},
486
+ "4054":{"name":"石柱土家族自治县","parent":"23"},
487
+ "4055":{"name":"秀山土家族苗族自治县","parent":"23"},
488
+ "4056":{"name":"酉阳土家族苗族自治县","parent":"23"},
489
+ "4057":{"name":"彭水苗族土家族自治县","parent":"23"},
490
+ "4058":{"name":"潜江市","parent":"18"},
491
+ "4059":{"name":"三沙市","parent":"22"},
492
+ "4060":{"name":"石河子市","parent":"32"},
493
+ "4061":{"name":"阿拉尔市","parent":"32"},
494
+ "4062":{"name":"图木舒克市","parent":"32"},
495
+ "4063":{"name":"五家渠市","parent":"32"},
496
+ "4064":{"name":"北屯市","parent":"32"},
497
+ "4065":{"name":"铁门关市","parent":"32"},
498
+ "4066":{"name":"儋州市","parent":"22"},
499
+ "4067":{"name":"五指山市","parent":"22"},
500
+ "4068":{"name":"文昌市","parent":"22"},
501
+ "4069":{"name":"琼海市","parent":"22"},
502
+ "4070":{"name":"万宁市","parent":"22"},
503
+ "4072":{"name":"定安县","parent":"22"},
504
+ "4073":{"name":"屯昌县","parent":"22"},
505
+ "4074":{"name":"澄迈县","parent":"22"},
506
+ "4075":{"name":"临高县","parent":"22"},
507
+ "4076":{"name":"琼中黎族苗族自治县","parent":"22"},
508
+ "4077":{"name":"保亭黎族苗族自治县","parent":"22"},
509
+ "4078":{"name":"白沙黎族自治县","parent":"22"},
510
+ "4079":{"name":"昌江黎族自治县","parent":"22"},
511
+ "4080":{"name":"乐东黎族自治县","parent":"22"},
512
+ "4081":{"name":"陵水黎族自治县","parent":"22"},
513
+ "4082":{"name":"马来西亚","parent":"3956"},
514
+ "6047":{"name":"长寿区","parent":"23"},
515
+ "6857":{"name":"阿富汗","parent":"3956"},
516
+ "6858":{"name":"阿尔巴尼亚","parent":"3956"},
517
+ "6859":{"name":"阿尔及利亚","parent":"3956"},
518
+ "6860":{"name":"美属萨摩亚","parent":"3956"},
519
+ "6861":{"name":"安道尔","parent":"3956"},
520
+ "6862":{"name":"安哥拉","parent":"3956"},
521
+ "6863":{"name":"安圭拉","parent":"3956"},
522
+ "6864":{"name":"南极洲","parent":"3956"},
523
+ "6865":{"name":"安提瓜和巴布达","parent":"3956"},
524
+ "6866":{"name":"阿根廷","parent":"3956"},
525
+ "6867":{"name":"亚美尼亚","parent":"3956"},
526
+ "6869":{"name":"奥地利","parent":"3956"},
527
+ "6870":{"name":"阿塞拜疆","parent":"3956"},
528
+ "6871":{"name":"巴哈马","parent":"3956"},
529
+ "6872":{"name":"巴林","parent":"3956"},
530
+ "6873":{"name":"孟加拉国","parent":"3956"},
531
+ "6874":{"name":"巴巴多斯","parent":"3956"},
532
+ "6875":{"name":"白俄罗斯","parent":"3956"},
533
+ "6876":{"name":"比利时","parent":"3956"},
534
+ "6877":{"name":"伯利兹","parent":"3956"},
535
+ "6878":{"name":"贝宁","parent":"3956"},
536
+ "6879":{"name":"百慕大","parent":"3956"},
537
+ "6880":{"name":"不丹","parent":"3956"},
538
+ "6881":{"name":"玻利维亚","parent":"3956"},
539
+ "6882":{"name":"波黑","parent":"3956"},
540
+ "6883":{"name":"博茨瓦纳","parent":"3956"},
541
+ "6884":{"name":"布维岛","parent":"3956"},
542
+ "6885":{"name":"巴西","parent":"3956"},
543
+ "6886":{"name":"英属印度洋领土","parent":"3956"},
544
+ "6887":{"name":"文莱","parent":"3956"},
545
+ "6888":{"name":"保加利亚","parent":"3956"},
546
+ "6889":{"name":"布基纳法索","parent":"3956"},
547
+ "6890":{"name":"布隆迪","parent":"3956"},
548
+ "6891":{"name":"柬埔寨","parent":"3956"},
549
+ "6892":{"name":"喀麦隆","parent":"3956"},
550
+ "6893":{"name":"佛得角","parent":"3956"},
551
+ "6894":{"name":"开曼群岛","parent":"3956"},
552
+ "6895":{"name":"中非","parent":"3956"},
553
+ "6896":{"name":"乍得","parent":"3956"},
554
+ "6897":{"name":"智利","parent":"3956"},
555
+ "6898":{"name":"圣诞岛","parent":"3956"},
556
+ "6899":{"name":"科科斯(基林)群岛","parent":"3956"},
557
+ "6900":{"name":"哥伦比亚","parent":"3956"},
558
+ "6901":{"name":"科摩罗","parent":"3956"},
559
+ "6902":{"name":"刚果(布)","parent":"3956"},
560
+ "6903":{"name":"刚果(金)","parent":"3956"},
561
+ "6904":{"name":"库克群岛","parent":"3956"},
562
+ "6905":{"name":"哥斯达黎加","parent":"3956"},
563
+ "6906":{"name":"科特迪瓦","parent":"3956"},
564
+ "6907":{"name":"克罗地亚","parent":"3956"},
565
+ "6908":{"name":"古巴","parent":"3956"},
566
+ "6909":{"name":"塞浦路斯","parent":"3956"},
567
+ "6910":{"name":"捷克","parent":"3956"},
568
+ "6911":{"name":"丹麦","parent":"3956"},
569
+ "6912":{"name":"吉布提","parent":"3956"},
570
+ "6913":{"name":"多米尼克","parent":"3956"},
571
+ "6914":{"name":"多米尼加共和国","parent":"3956"},
572
+ "6915":{"name":"东帝汶","parent":"3956"},
573
+ "6916":{"name":"厄瓜多尔","parent":"3956"},
574
+ "6917":{"name":"埃及","parent":"3956"},
575
+ "6918":{"name":"萨尔瓦多","parent":"3956"},
576
+ "6919":{"name":"赤道几内亚","parent":"3956"},
577
+ "6920":{"name":"厄立特里亚","parent":"3956"},
578
+ "6921":{"name":"爱沙尼亚","parent":"3956"},
579
+ "6922":{"name":"埃塞俄比亚","parent":"3956"},
580
+ "6923":{"name":"福克兰群岛(马尔维纳斯)","parent":"3956"},
581
+ "6924":{"name":"法罗群岛","parent":"3956"},
582
+ "6925":{"name":"斐济","parent":"3956"},
583
+ "6926":{"name":"芬兰","parent":"3956"},
584
+ "6927":{"name":"法属圭亚那","parent":"3956"},
585
+ "6928":{"name":"法属波利尼西亚","parent":"3956"},
586
+ "6929":{"name":"法属南部领土","parent":"3956"},
587
+ "6930":{"name":"加蓬","parent":"3956"},
588
+ "6931":{"name":"冈比亚","parent":"3956"},
589
+ "6932":{"name":"格鲁吉亚","parent":"3956"},
590
+ "6933":{"name":"加纳","parent":"3956"},
591
+ "6934":{"name":"直布罗陀","parent":"3956"},
592
+ "6935":{"name":"希腊","parent":"3956"},
593
+ "6936":{"name":"格陵兰","parent":"3956"},
594
+ "6937":{"name":"格林纳达","parent":"3956"},
595
+ "6938":{"name":"瓜德罗普","parent":"3956"},
596
+ "6939":{"name":"关岛","parent":"3956"},
597
+ "6940":{"name":"危地马拉","parent":"3956"},
598
+ "6941":{"name":"几内亚","parent":"3956"},
599
+ "6942":{"name":"几内亚比绍","parent":"3956"},
600
+ "6943":{"name":"圭亚那","parent":"3956"},
601
+ "6944":{"name":"海地","parent":"3956"},
602
+ "6945":{"name":"赫德岛和麦克唐纳岛","parent":"3956"},
603
+ "6946":{"name":"洪都拉斯","parent":"3956"},
604
+ "6947":{"name":"匈牙利","parent":"3956"},
605
+ "6948":{"name":"冰岛","parent":"3956"},
606
+ "6949":{"name":"印度","parent":"3956"},
607
+ "6950":{"name":"印度尼西亚","parent":"3956"},
608
+ "6951":{"name":"伊朗","parent":"3956"},
609
+ "6952":{"name":"伊拉克","parent":"3956"},
610
+ "6953":{"name":"爱尔兰","parent":"3956"},
611
+ "6954":{"name":"以色列","parent":"3956"},
612
+ "6955":{"name":"牙买加","parent":"3956"},
613
+ "6956":{"name":"约旦","parent":"3956"},
614
+ "6957":{"name":"哈萨克斯坦","parent":"3956"},
615
+ "6958":{"name":"肯尼亚","parent":"3956"},
616
+ "6959":{"name":"基里巴斯","parent":"3956"},
617
+ "6960":{"name":"朝鲜","parent":"3956"},
618
+ "6961":{"name":"科威特","parent":"3956"},
619
+ "6962":{"name":"吉尔吉斯斯坦","parent":"3956"},
620
+ "6963":{"name":"老挝","parent":"3956"},
621
+ "6964":{"name":"拉脱维亚","parent":"3956"},
622
+ "6965":{"name":"黎巴嫩","parent":"3956"},
623
+ "6966":{"name":"莱索托","parent":"3956"},
624
+ "6967":{"name":"利比里亚","parent":"3956"},
625
+ "6968":{"name":"利比亚","parent":"3956"},
626
+ "6969":{"name":"列支敦士登","parent":"3956"},
627
+ "6970":{"name":"立陶宛","parent":"3956"},
628
+ "6971":{"name":"卢森堡","parent":"3956"},
629
+ "6972":{"name":"前南马其顿","parent":"3956"},
630
+ "6973":{"name":"马达加斯加","parent":"3956"},
631
+ "6974":{"name":"马拉维","parent":"3956"},
632
+ "6975":{"name":"马尔代夫","parent":"3956"},
633
+ "6976":{"name":"马里","parent":"3956"},
634
+ "6977":{"name":"马耳他","parent":"3956"},
635
+ "6978":{"name":"马绍尔群岛","parent":"3956"},
636
+ "6979":{"name":"马提尼克","parent":"3956"},
637
+ "6980":{"name":"毛里塔尼亚","parent":"3956"},
638
+ "6981":{"name":"毛里求斯","parent":"3956"},
639
+ "6982":{"name":"马约特","parent":"3956"},
640
+ "6983":{"name":"墨西哥","parent":"3956"},
641
+ "6984":{"name":"密克罗尼西亚联邦","parent":"3956"},
642
+ "6985":{"name":"摩尔多瓦","parent":"3956"},
643
+ "6986":{"name":"摩纳哥","parent":"3956"},
644
+ "6987":{"name":"蒙古","parent":"3956"},
645
+ "6988":{"name":"蒙特塞拉特","parent":"3956"},
646
+ "6989":{"name":"摩洛哥","parent":"3956"},
647
+ "6990":{"name":"莫桑比克","parent":"3956"},
648
+ "6991":{"name":"缅甸","parent":"3956"},
649
+ "6992":{"name":"纳米比亚","parent":"3956"},
650
+ "6993":{"name":"瑙鲁","parent":"3956"},
651
+ "6994":{"name":"尼泊尔","parent":"3956"},
652
+ "6995":{"name":"荷兰","parent":"3956"},
653
+ "6996":{"name":"荷属安的列斯","parent":"3956"},
654
+ "6997":{"name":"新喀里多尼亚","parent":"3956"},
655
+ "6998":{"name":"新西兰","parent":"3956"},
656
+ "6999":{"name":"尼加拉瓜","parent":"3956"},
657
+ "7000":{"name":"尼日尔","parent":"3956"},
658
+ "7001":{"name":"尼日利亚","parent":"3956"},
659
+ "7002":{"name":"纽埃","parent":"3956"},
660
+ "7003":{"name":"诺福克岛","parent":"3956"},
661
+ "7004":{"name":"北马里亚纳","parent":"3956"},
662
+ "7005":{"name":"挪威","parent":"3956"},
663
+ "7006":{"name":"阿曼","parent":"3956"},
664
+ "7007":{"name":"巴基斯坦","parent":"3956"},
665
+ "7008":{"name":"帕劳","parent":"3956"},
666
+ "7009":{"name":"巴勒斯坦","parent":"3956"},
667
+ "7010":{"name":"巴拿马","parent":"3956"},
668
+ "7011":{"name":"巴布亚新几内亚","parent":"3956"},
669
+ "7012":{"name":"巴拉圭","parent":"3956"},
670
+ "7013":{"name":"秘鲁","parent":"3956"},
671
+ "7014":{"name":"菲律宾","parent":"3956"},
672
+ "7015":{"name":"皮特凯恩群岛","parent":"3956"},
673
+ "7016":{"name":"波兰","parent":"3956"},
674
+ "7017":{"name":"葡萄牙","parent":"3956"},
675
+ "7018":{"name":"波多黎各","parent":"3956"},
676
+ "7019":{"name":"卡塔尔","parent":"3956"},
677
+ "7020":{"name":"留尼汪","parent":"3956"},
678
+ "7021":{"name":"罗马尼亚","parent":"3956"},
679
+ "7022":{"name":"俄罗斯联邦","parent":"3956"},
680
+ "7023":{"name":"卢旺达","parent":"3956"},
681
+ "7024":{"name":"圣赫勒拿","parent":"3956"},
682
+ "7025":{"name":"圣基茨和尼维斯","parent":"3956"},
683
+ "7026":{"name":"圣卢西亚","parent":"3956"},
684
+ "7027":{"name":"圣皮埃尔和密克隆","parent":"3956"},
685
+ "7028":{"name":"圣文森特和格林纳丁斯","parent":"3956"},
686
+ "7029":{"name":"萨摩亚","parent":"3956"},
687
+ "7030":{"name":"圣马力诺","parent":"3956"},
688
+ "7031":{"name":"圣多美和普林西比","parent":"3956"},
689
+ "7032":{"name":"沙特阿拉伯","parent":"3956"},
690
+ "7033":{"name":"塞内加尔","parent":"3956"},
691
+ "7034":{"name":"塞舌尔","parent":"3956"},
692
+ "7035":{"name":"塞拉利昂","parent":"3956"},
693
+ "7036":{"name":"新加坡","parent":"3956"},
694
+ "7037":{"name":"斯洛伐克","parent":"3956"},
695
+ "7038":{"name":"斯洛文尼亚","parent":"3956"},
696
+ "7039":{"name":"所罗门群岛","parent":"3956"},
697
+ "7040":{"name":"索马里","parent":"3956"},
698
+ "7041":{"name":"南非","parent":"3956"},
699
+ "7042":{"name":"南乔治亚岛和南桑德韦奇岛","parent":"3956"},
700
+ "7043":{"name":"斯里兰卡","parent":"3956"},
701
+ "7044":{"name":"苏丹","parent":"3956"},
702
+ "7045":{"name":"苏里南","parent":"3956"},
703
+ "7046":{"name":"斯瓦尔巴群岛","parent":"3956"},
704
+ "7047":{"name":"斯威士兰","parent":"3956"},
705
+ "7048":{"name":"瑞典","parent":"3956"},
706
+ "7049":{"name":"瑞士","parent":"3956"},
707
+ "7050":{"name":"叙利亚","parent":"3956"},
708
+ "7051":{"name":"塔吉克斯坦","parent":"3956"},
709
+ "7052":{"name":"坦桑尼亚","parent":"3956"},
710
+ "7053":{"name":"泰国","parent":"3956"},
711
+ "7054":{"name":"多哥","parent":"3956"},
712
+ "7055":{"name":"托克劳","parent":"3956"},
713
+ "7056":{"name":"汤加","parent":"3956"},
714
+ "7057":{"name":"特立尼达和多巴哥","parent":"3956"},
715
+ "7058":{"name":"突尼斯","parent":"3956"},
716
+ "7059":{"name":"土耳其","parent":"3956"},
717
+ "7060":{"name":"土库曼斯坦","parent":"3956"},
718
+ "7061":{"name":"特克斯科斯群岛","parent":"3956"},
719
+ "7062":{"name":"图瓦卢","parent":"3956"},
720
+ "7063":{"name":"乌干达","parent":"3956"},
721
+ "7064":{"name":"乌克兰","parent":"3956"},
722
+ "7065":{"name":"阿联酋","parent":"3956"},
723
+ "7066":{"name":"美国本土外小岛屿","parent":"3956"},
724
+ "7067":{"name":"乌拉圭","parent":"3956"},
725
+ "7068":{"name":"乌兹别克斯坦","parent":"3956"},
726
+ "7069":{"name":"瓦努阿图","parent":"3956"},
727
+ "7070":{"name":"梵蒂冈","parent":"3956"},
728
+ "7071":{"name":"委内瑞拉","parent":"3956"},
729
+ "7072":{"name":"越南","parent":"3956"},
730
+ "7073":{"name":"英属维尔京群岛","parent":"3956"},
731
+ "7074":{"name":"美属维尔京群岛","parent":"3956"},
732
+ "7075":{"name":"瓦利斯和富图纳","parent":"3956"},
733
+ "7076":{"name":"西撒哈拉","parent":"3956"},
734
+ "7077":{"name":"也门","parent":"3956"},
735
+ "7078":{"name":"南斯拉夫","parent":"3956"},
736
+ "7079":{"name":"赞比亚","parent":"3956"},
737
+ "7080":{"name":"津巴布韦","parent":"3956"},
738
+ "7081":{"name":"塞尔维亚","parent":"3956"},
739
+ "7082":{"name":"雄安新区","parent":"4"},
740
+ "7084":{"name":"天门市","parent":"18"}
741
+ }
742
+
743
+ NM_SET = set([v["name"] for _,v in TBL.items()])
744
+
745
+ def get_names(id):
746
+ if not id or str(id).lower() == "none":return []
747
+ id = str(id)
748
+ if not re.match("[0-9]+$", id.strip()):return [id]
749
+ nms = []
750
+ d = TBL.get(id)
751
+ if not d:return[]
752
+ nms.append(d["name"])
753
+ p = get_names(d["parent"])
754
+ if p: nms.extend(p)
755
+ return nms
756
+
757
+ import re
758
+ def isName(nm):
759
+ if nm in NM_SET:return True
760
+ if nm + "市" in NM_SET:return True
761
+ if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:return True
762
+ return False
deepdoc/parser/resume/entities/res/corp.tks.freq.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "科技",
3
+ "集团",
4
+ "网络科技",
5
+ "技术",
6
+ "信息",
7
+ "分公司",
8
+ "信息技术",
9
+ "发展",
10
+ "科技股份",
11
+ "网络",
12
+ "贸易",
13
+ "商贸",
14
+ "工程",
15
+ "企业",
16
+ "集团股份",
17
+ "商务",
18
+ "工业",
19
+ "控股集团",
20
+ "国际贸易",
21
+ "软件技术",
22
+ "数码科技",
23
+ "软件开发",
24
+ "有限",
25
+ "经营",
26
+ "科技开发",
27
+ "股份公司",
28
+ "电子技术",
29
+ "实业集团",
30
+ "责任",
31
+ "无限",
32
+ "工程技术",
33
+ "上市公司",
34
+ "技术开发",
35
+ "软件系统",
36
+ "总公司",
37
+ "网络服务",
38
+ "ltd.",
39
+ "technology",
40
+ "company",
41
+ "服务公司",
42
+ "计算机技术",
43
+ "计算机软件",
44
+ "电子信息",
45
+ "corporation",
46
+ "计算机服务",
47
+ "计算机系统",
48
+ "有限公司",
49
+ "事业部",
50
+ "公司",
51
+ "股份",
52
+ "有限责任",
53
+ "软件",
54
+ "控股",
55
+ "高科技",
56
+ "房地产",
57
+ "事业群",
58
+ "部门",
59
+ "电子商务",
60
+ "人力资源顾问",
61
+ "人力资源",
62
+ "株式会社",
63
+ "网络营销"
64
+ ]
65
+
deepdoc/parser/resume/entities/res/corp_baike_len.csv ADDED
The diff for this file is too large to render. See raw diff
 
deepdoc/parser/resume/entities/res/corp_tag.json ADDED
The diff for this file is too large to render. See raw diff
 
deepdoc/parser/resume/entities/res/good_corp.json ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "google assistant investments",
3
+ "amazon",
4
+ "dingtalk china information",
5
+ "zhejiang alibaba communication",
6
+ "yunos",
7
+ "腾讯云",
8
+ "新浪新闻",
9
+ "网邻通",
10
+ "蚂蚁集团",
11
+ "大疆",
12
+ "恒生股份",
13
+ "sf express",
14
+ "智者天下",
15
+ "shanghai hema network",
16
+ "papayamobile",
17
+ "lexinfintech",
18
+ "industrial consumer finance",
19
+ "360搜索",
20
+ "世纪光速",
21
+ "迅雷区块链",
22
+ "赛盒科技",
23
+ "齐力电子商务",
24
+ "平安养老险",
25
+ "平安证券",
26
+ "平安好贷",
27
+ "五八新服",
28
+ "呯嘭智能",
29
+ "阿里妈妈",
30
+ "mdt",
31
+ "tencent",
32
+ "weibo",
33
+ "浪潮软件",
34
+ "阿里巴巴广告",
35
+ "mashang consumer finance",
36
+ "维沃",
37
+ "hqg , limited",
38
+ "moodys",
39
+ "搜狐支付",
40
+ "百度秀",
41
+ "新浪服务",
42
+ "零售通",
43
+ "同城艺龙",
44
+ "虾米音乐",
45
+ "贝壳集团",
46
+ "小米有品",
47
+ "滴滴自动驾驶",
48
+ "图记",
49
+ "阿里影业",
50
+ "卓联软件",
51
+ "zhejiang tmall",
52
+ "谷歌中国",
53
+ "hithink flush",
54
+ "时装科技",
55
+ "程会玩国际旅行社",
56
+ "amazon china holding limited",
57
+ "中信消金",
58
+ "当当比特物流",
59
+ "新浪新媒体咨询",
60
+ "tongcheng network",
61
+ "金山在线",
62
+ "shopping cart",
63
+ "犀互动",
64
+ "五八",
65
+ "bilibili",
66
+ "阿里星球",
67
+ "滴滴金科服务",
68
+ "美团",
69
+ "哈啰出行",
70
+ "face",
71
+ "平安健康",
72
+ "招商银行",
73
+ "连亚",
74
+ "盒马网络",
75
+ "b站",
76
+ "华为机器",
77
+ "shanghai mdt infotech",
78
+ "ping an healthkonnect",
79
+ "beijing home link real estate broker",
80
+ "花海仓",
81
+ "beijing jingdong shangke information",
82
+ "微影智能",
83
+ "酷狗游戏",
84
+ "health.pingan.com",
85
+ "众安",
86
+ "陌陌",
87
+ "海康威视数字",
88
+ "同程网",
89
+ "艾丁金融",
90
+ "知乎",
91
+ " lu",
92
+ "国际商业机器公司",
93
+ "捷信消费金融",
94
+ "恒生利融",
95
+ "china merchants bank",
96
+ "企鹅电竞",
97
+ "捷信信驰",
98
+ "360智能家居",
99
+ "小桔车服",
100
+ "homecredit",
101
+ "皮皮虾",
102
+ "畅游",
103
+ "聚爱聊",
104
+ "suning.com",
105
+ "途牛旅游网",
106
+ "花呗",
107
+ "盈店通",
108
+ "sina",
109
+ "阿里巴巴音乐",
110
+ "华为技术有限公司",
111
+ "国付宝",
112
+ "shanghai lianshang network",
113
+ "oppo",
114
+ "华为投资控股",
115
+ "beijing sohu new media information",
116
+ "times square",
117
+ "菜鸟物流",
118
+ "lingxing",
119
+ "jd digits",
120
+ "同程旅游",
121
+ "分期乐",
122
+ "火锅视频",
123
+ "天天快报",
124
+ "猎豹移动",
125
+ "五八人力资源",
126
+ "宝宝树",
127
+ "顺丰科技",
128
+ "上海西翠",
129
+ "诗程文化传播",
130
+ "dewu",
131
+ "领星网络",
132
+ "aliexpress",
133
+ "贝塔通科技",
134
+ "链家",
135
+ "花小猪",
136
+ "趣输入",
137
+ "搜狐新媒体",
138
+ "一淘",
139
+ "56",
140
+ "qq阅读",
141
+ "青桔单车",
142
+ "iflytek",
143
+ "每日优鲜电子商务",
144
+ "腾讯觅影",
145
+ "微医",
146
+ "松果网",
147
+ "paypal",
148
+ "递瑞供应链管理",
149
+ "领星",
150
+ "qunar",
151
+ "三快",
152
+ "lu.com",
153
+ "携程旅行网",
154
+ "新潮传媒",
155
+ "链家经纪",
156
+ "景域文化",
157
+ "阿里健康",
158
+ "pingpeng",
159
+ "聚划算",
160
+ "零机科技",
161
+ "街兔电单车",
162
+ "快乐购",
163
+ "华为数字能源",
164
+ "搜狐",
165
+ "陆家嘴国际金融资产交易市场",
166
+ "nanjing tuniu",
167
+ "亚马逊",
168
+ "苏宁易购",
169
+ "携程旅游",
170
+ "苏宁金服",
171
+ "babytree",
172
+ "悟空问答",
173
+ "同花顺",
174
+ "eastmoney",
175
+ "浪潮信息",
176
+ "滴滴智慧交通",
177
+ "beijing ruixun lingtong",
178
+ "平安综合金融服务",
179
+ "爱奇艺",
180
+ "小米集团",
181
+ "华为云",
182
+ "微店",
183
+ "恒生集团",
184
+ "网易有道",
185
+ "boccfc",
186
+ "世纪思速科技",
187
+ "海康消防",
188
+ "beijing xiaomi",
189
+ "众安科技",
190
+ "五八同城",
191
+ "霆程汽车租赁",
192
+ "云卖分销",
193
+ "乐信集团",
194
+ "蚂蚁",
195
+ "舶乐蜜电子商务",
196
+ "支付宝中国",
197
+ "砖块消消消",
198
+ "vivo",
199
+ "阿里互娱",
200
+ "中国平安",
201
+ "lingxihudong",
202
+ "百度网盘",
203
+ "1号店",
204
+ "字节跳动",
205
+ "京东科技",
206
+ "驴妈妈兴旅国际旅行社",
207
+ "hangzhou alibaba music",
208
+ "xunlei",
209
+ "灵犀互动娱乐",
210
+ "快手",
211
+ "youtube",
212
+ "连尚慧眼",
213
+ "腾讯体育",
214
+ "爱商在线",
215
+ "酷我音乐",
216
+ "金融壹账通",
217
+ "搜狗服务",
218
+ "banma information",
219
+ "a站",
220
+ "罗汉堂",
221
+ "薇仕网络",
222
+ "搜狐新闻",
223
+ "贝宝",
224
+ "薇仕",
225
+ "口袋时尚科技",
226
+ "穆迪咨询",
227
+ "新狐投资管理",
228
+ "hikvision",
229
+ "alimama china holding limited",
230
+ "超聚变数字",
231
+ "腾讯视频",
232
+ "恒生电子",
233
+ "百度游戏",
234
+ "绿洲",
235
+ "木瓜移动",
236
+ "红袖添香",
237
+ "店匠科技",
238
+ "易贝",
239
+ "一淘网",
240
+ "博览群书",
241
+ "唯品会",
242
+ "lazglobal",
243
+ "amap",
244
+ "芒果网",
245
+ "口碑",
246
+ "海康慧影",
247
+ "腾讯音乐娱乐",
248
+ "网易严选",
249
+ "微信",
250
+ "shenzhen lexin holding",
251
+ "hangzhou pingpeng intelligent",
252
+ "连尚网络",
253
+ "海思",
254
+ "isunor",
255
+ "蝉翼",
256
+ "阿里游戏",
257
+ "广州优视",
258
+ "优视",
259
+ "腾讯征信",
260
+ "识装",
261
+ "finserve.pingan.com",
262
+ "papaya",
263
+ "阅文",
264
+ "平安健康保险",
265
+ "考拉海购",
266
+ "网易印象",
267
+ "wifi万能钥匙",
268
+ "新浪互联服务",
269
+ "亚马逊云科技",
270
+ "迅雷看看",
271
+ "华为朗新科技",
272
+ "adyen hong kong limited",
273
+ "谷歌",
274
+ "得物",
275
+ "网心",
276
+ "cainiao network",
277
+ "沐瞳",
278
+ "linkedln",
279
+ "hundsun",
280
+ "阿里旅行",
281
+ "珍爱网",
282
+ "阿里巴巴通信",
283
+ "金山奇剑",
284
+ "tongtool",
285
+ "华为安捷信电气",
286
+ "快乐时代",
287
+ "平安寿险",
288
+ "微博",
289
+ "微跳蚤",
290
+ "oppo移动通信",
291
+ "毒",
292
+ "alimama",
293
+ "shoplazza",
294
+ "shenzhen dianjiang science and",
295
+ "众鸣世科",
296
+ "平安金融",
297
+ "狐友",
298
+ "维沃移动通信",
299
+ "tobosoft",
300
+ "齐力电商",
301
+ "ali",
302
+ "诚信通",
303
+ "行吟",
304
+ "跳舞的线",
305
+ "橙心优选",
306
+ "众安健康",
307
+ "亚马逊中国投资",
308
+ "德絮投资管理中心合伙",
309
+ "招联消费金融",
310
+ "百度文学",
311
+ "芝麻信用",
312
+ "阿里零售通",
313
+ "时装",
314
+ "花样直播",
315
+ "sogou",
316
+ "uc",
317
+ "海思半导体",
318
+ "zhongan online p&c insurance",
319
+ "新浪数字",
320
+ "驴妈妈旅游网",
321
+ "华为数字能源技术",
322
+ "京东数科",
323
+ "oracle",
324
+ "xiaomi",
325
+ "nyse",
326
+ "阳光消费金融",
327
+ "天天动听",
328
+ "大众点评",
329
+ "上海瑞家",
330
+ "trustpass",
331
+ "hundsun technologies",
332
+ "美团小贷",
333
+ "ebay",
334
+ "通途",
335
+ "tcl",
336
+ "鸿蒙",
337
+ "酷狗计算机",
338
+ "品诺保险",
339
+ "capitalg",
340
+ "康盛创想",
341
+ "58同城",
342
+ "闲鱼",
343
+ "微软",
344
+ "吉易付科技",
345
+ "理财通",
346
+ "ctrip",
347
+ "yy",
348
+ "华为数字",
349
+ "kingsoft",
350
+ "孙宁金融",
351
+ "房江湖经纪",
352
+ "youku",
353
+ "ant financial services group",
354
+ "盒马",
355
+ "sensetime",
356
+ "伊千网络",
357
+ "小豹ai翻译棒",
358
+ "shopify",
359
+ "前海微众银行",
360
+ "qd",
361
+ "gmail",
362
+ "pingpong",
363
+ "alibaba group holding limited",
364
+ "捷信时空电子商务",
365
+ "orientsec",
366
+ "乔戈里管理咨询",
367
+ "ant",
368
+ "锐讯灵通",
369
+ "兴业消费金融",
370
+ "京东叁佰陆拾度电子商务",
371
+ "新浪",
372
+ "优酷土豆",
373
+ "海康机器人",
374
+ "美团单车",
375
+ "海康存储",
376
+ "领英",
377
+ "阿里全球速卖通",
378
+ "美菜网",
379
+ "京邦达",
380
+ "安居客",
381
+ "阿里体育",
382
+ "相互宝",
383
+ "cloudwalk",
384
+ "百度智能云",
385
+ "贝壳",
386
+ "酷狗",
387
+ "sunshine consumer finance",
388
+ "掌宜",
389
+ "奇酷网",
390
+ "核新同花顺",
391
+ "阿里巴巴影业",
392
+ "节创",
393
+ "学而思网校",
394
+ "速途",
395
+ "途牛",
396
+ "阿里云计算",
397
+ "beijing sensetime",
398
+ "alibaba cloud",
399
+ "西瓜视频",
400
+ "美团优选",
401
+ "orient securities limited",
402
+ "华为朗新",
403
+ "店匠",
404
+ "shanghai weishi network",
405
+ "友盟",
406
+ "飞猪旅行",
407
+ "滴滴出行",
408
+ "alipay",
409
+ "mogu",
410
+ "dangdang",
411
+ "大麦网",
412
+ "汉军智能系统",
413
+ "百度地图",
414
+ "货车帮",
415
+ "狐狸金服",
416
+ "众安在线保险经纪",
417
+ "华为通信",
418
+ "新浪支付",
419
+ "zhihu",
420
+ "alibaba cloud computing",
421
+ "沙发视频",
422
+ "金山软件",
423
+ "ping an good doctor",
424
+ "携程",
425
+ "脉脉",
426
+ "youku information beijing",
427
+ "zhongan",
428
+ "艾丁软件",
429
+ "乒乓智能",
430
+ "蘑菇街",
431
+ "taobao",
432
+ "华为技术服务",
433
+ "仕承文化传播",
434
+ "安捷信",
435
+ "狐狸互联网小额贷款",
436
+ "节点迅捷",
437
+ "中国银行",
438
+ "搜镇",
439
+ "众安在线",
440
+ "dingtalk",
441
+ "云从科技",
442
+ "beijing jingbangda trade",
443
+ "moody s",
444
+ "滚动的天空",
445
+ "yl.pingan.com",
446
+ "奇虎",
447
+ "alihealth",
448
+ "芒果tv",
449
+ "lufax",
450
+ "美团打车",
451
+ "小桔",
452
+ "贝壳找房网",
453
+ "小米科技",
454
+ "vips",
455
+ "kindle",
456
+ "亚马逊服务",
457
+ "citic consumer finance",
458
+ "微众",
459
+ "搜狗智慧互联网医院",
460
+ "盒马鲜生",
461
+ "life.pinan.com",
462
+ "ph.com.cn",
463
+ "银联",
464
+ "cmbchina",
465
+ "平安金融科技咨询",
466
+ "微保",
467
+ "甲骨文中国",
468
+ "飞书",
469
+ "koubei shanghai information",
470
+ "企鹅辅导",
471
+ "斑马",
472
+ "平安租赁",
473
+ "云从",
474
+ "马上消费",
475
+ "hangzhou ali baba advertising",
476
+ "金山",
477
+ "赛盒",
478
+ "科大讯飞",
479
+ "金星创业投资",
480
+ "平安国际融资租赁",
481
+ "360你财富",
482
+ "西山居",
483
+ "shenzhen qianhai fourth paradigm data",
484
+ "海思光电子",
485
+ "猎户星空",
486
+ "网易公司",
487
+ "浪潮",
488
+ "粒粒橙传媒",
489
+ "招联金融",
490
+ "100. me",
491
+ "捷信信驰咨询",
492
+ "唯品仓",
493
+ "orient",
494
+ "趣拿",
495
+ "摩拜单车",
496
+ "天猫精灵",
497
+ "菜鸟",
498
+ "豹小贩",
499
+ "去哪儿",
500
+ "米家",
501
+ "哈啰单车",
502
+ "搜狐体育",
503
+ "shopify payments usa",
504
+ "高德软件",
505
+ "讯联智付",
506
+ "乐信",
507
+ "唯你搭",
508
+ "第四范式",
509
+ "菜鸟网络",
510
+ "同程",
511
+ "yy语音",
512
+ "浪潮云",
513
+ "东财",
514
+ "淘宝",
515
+ "寻梦",
516
+ "citic securities limited",
517
+ "青橙之旅",
518
+ "阿里巴巴",
519
+ "番茄小说",
520
+ "上海亿贝",
521
+ "inspur",
522
+ "babytree inc",
523
+ "海康智慧产业股权投资基金合伙合伙",
524
+ "adyen",
525
+ "艺龙",
526
+ "蚂蚁金服",
527
+ "平安金服",
528
+ "百度百科",
529
+ "unionpay",
530
+ "当当",
531
+ "阅文集团",
532
+ "东方财富",
533
+ "东方证券",
534
+ "哈罗单车",
535
+ "优酷",
536
+ "海康",
537
+ "alipay china network",
538
+ "网商银行",
539
+ "钧正",
540
+ "property.pingan.com",
541
+ "豹咖啡",
542
+ "网易",
543
+ "我爱cba",
544
+ "theduapp",
545
+ "360",
546
+ "金山数字娱乐",
547
+ "新浪阅读",
548
+ "alibabagames",
549
+ "顺丰",
550
+ "支点商贸",
551
+ "同程旅行",
552
+ "citic securities",
553
+ "ele.com",
554
+ "tal",
555
+ "fresh hema",
556
+ "运满满",
557
+ "贝壳网",
558
+ "酷狗音乐",
559
+ "鲜城",
560
+ "360健康",
561
+ "浪潮世科",
562
+ "迅雷网络",
563
+ "哔哩哔哩",
564
+ "华为电动",
565
+ "淘友天下",
566
+ "华多网络",
567
+ "xunlei networking technologies",
568
+ "云杉",
569
+ "当当网电子商务",
570
+ "津虹网络",
571
+ "wedoc cloud hangzhou holdings",
572
+ "alisports shanghai",
573
+ "旷视金智",
574
+ "钉钉中国",
575
+ "微影",
576
+ "金山快快",
577
+ "亿贝",
578
+ "wedoc",
579
+ "autonavi",
580
+ "哈啰助力车",
581
+ "google cloud",
582
+ "新浪乐居",
583
+ "京东股票",
584
+ "搜狗智慧远程医疗中心",
585
+ "中银消金",
586
+ "merchants union consumer finance",
587
+ "王者荣耀",
588
+ "百度手机",
589
+ "美团民宿",
590
+ "kaola",
591
+ "小屋",
592
+ "金山网络",
593
+ "来往",
594
+ "顺丰速运",
595
+ "腾讯课堂",
596
+ "百度在线网络",
597
+ "美团买菜",
598
+ "威视汽车",
599
+ "uc mobile",
600
+ "来赞达",
601
+ "平安健康医疗",
602
+ "豹小秘",
603
+ "尚网",
604
+ "哈勃投资",
605
+ " ping an insurance group of china ,",
606
+ "小米",
607
+ "360好药",
608
+ "qq音乐",
609
+ "lingxigames",
610
+ "faceu激萌",
611
+ "搜狗",
612
+ "sohu",
613
+ "满帮",
614
+ "vipshop",
615
+ "wishpost",
616
+ "金山世游",
617
+ "shanghai yibaimi network",
618
+ "1688",
619
+ "海康汽车",
620
+ "顺丰控股",
621
+ "华为",
622
+ "妙镜vr",
623
+ "paybkj.com",
624
+ "hellobike",
625
+ "豹来电",
626
+ "京东",
627
+ "驴妈妈",
628
+ "momo",
629
+ "平安健康险",
630
+ "哈勃科技",
631
+ "美菜",
632
+ "众安在线财产保险",
633
+ "海康威视",
634
+ "east money information",
635
+ "阿里云",
636
+ "蝉游记",
637
+ "余额宝",
638
+ "屋客",
639
+ "滴滴",
640
+ "shopify international limited",
641
+ "百度",
642
+ "阿里健康中国",
643
+ "阿里通信",
644
+ "微梦创科",
645
+ "微医云",
646
+ "轻颜相机",
647
+ "搜易居",
648
+ "趣店集团",
649
+ "美团云",
650
+ "ant group",
651
+ "金山云",
652
+ "beijing express hand",
653
+ "觅觅",
654
+ "支付宝",
655
+ "滴滴承信科技咨询服务",
656
+ "拼多多",
657
+ "众安运动",
658
+ "乞力电商",
659
+ "youcash",
660
+ "唯品金融",
661
+ "陆金所",
662
+ "本地生活",
663
+ "sz dji",
664
+ "海康智能",
665
+ "魔方网聘",
666
+ "青藤大学",
667
+ "international business machines",
668
+ "学而思",
669
+ "beijing zhongming century science and",
670
+ "猎豹清理大师",
671
+ "asinking",
672
+ "高德",
673
+ "苏宁",
674
+ "优酷网",
675
+ "艾丁",
676
+ "中银消费金融",
677
+ "京东健康",
678
+ "五八教育",
679
+ "pingpongx",
680
+ "搜狐时尚",
681
+ "阿里广告",
682
+ "平安财险",
683
+ "中邮消金",
684
+ "etao",
685
+ "怕怕",
686
+ "nyse:cmcm",
687
+ "华为培训中心",
688
+ "高德地图",
689
+ "云狐天下征信",
690
+ "大疆创新",
691
+ "连尚",
692
+ "壹佰米",
693
+ "康健公司",
694
+ "iqiyi.com",
695
+ "360安全云盘",
696
+ "馒头直播",
697
+ "淘友网",
698
+ "东方赢家",
699
+ "bank of china",
700
+ "微众银行",
701
+ "阿里巴巴国际站",
702
+ "虾米",
703
+ "去哪儿网",
704
+ "ctrip travel network shanghai",
705
+ "潇湘书院",
706
+ "腾讯",
707
+ "快乐阳光互动娱乐传媒",
708
+ "迅雷",
709
+ "weidian",
710
+ "滴滴货运",
711
+ "ping an puhui enterprise management",
712
+ "新浪仓石基金销售",
713
+ "搜狐焦点",
714
+ "alibaba pictures",
715
+ "wps",
716
+ "平安",
717
+ "lazmall",
718
+ "百度开放平台",
719
+ "兴业消金",
720
+ " 珍爱网",
721
+ "京东云",
722
+ "小红书",
723
+ "1688. com",
724
+ "如视智数",
725
+ "missfresh",
726
+ "pazl.pingan.cn",
727
+ "平安集团",
728
+ "kugou",
729
+ "懂车帝",
730
+ "斑马智行",
731
+ "浪潮集团",
732
+ "netease hangzhou network",
733
+ "pagd.net",
734
+ "探探",
735
+ "chinaliterature",
736
+ "amazon亚马逊",
737
+ "alphabet",
738
+ "当当文创手工艺品电子商务",
739
+ "五八邦",
740
+ "shenzhen zhenai network information",
741
+ "lingshoutong",
742
+ "字节",
743
+ "lvmama",
744
+ "金山办公",
745
+ "众安保险",
746
+ "时装信息",
747
+ "优视科技",
748
+ "guangzhou kugou",
749
+ "ibm",
750
+ "滴滴打车",
751
+ "beijing sogou information service",
752
+ "megvii",
753
+ "健谈哥",
754
+ "cloudwalk group",
755
+ "蜂联科技",
756
+ "冬云",
757
+ "京东尚科",
758
+ "钢琴块2",
759
+ "京东世纪",
760
+ "商汤",
761
+ "众鸣世纪",
762
+ "腾讯音乐",
763
+ "迅雷网文化",
764
+ "华为云计算技术",
765
+ "live.me",
766
+ "全球速卖通",
767
+ "快的打车",
768
+ "hello group inc",
769
+ "美丽说",
770
+ "suning",
771
+ "opengauss",
772
+ "lazada",
773
+ "tmall",
774
+ "acfun",
775
+ "当当网",
776
+ "中银",
777
+ "旷视科技",
778
+ "百度钱包",
779
+ "淘宝网",
780
+ "新浪微博",
781
+ "迅雷集团",
782
+ "中信消费金融",
783
+ "学而思教育",
784
+ "平安普惠",
785
+ "悟空跨境",
786
+ "irobotbox",
787
+ "平安产险",
788
+ "inspur group",
789
+ "世纪卓越快递服务",
790
+ "奇虎360",
791
+ "webank",
792
+ "偶藻",
793
+ "唯品支付",
794
+ "腾讯云计算",
795
+ "众安服务",
796
+ "亿之唐",
797
+ "beijing 58 information ttechnology",
798
+ "平安好医生",
799
+ "迅雷之锤",
800
+ "旅行小账本",
801
+ "芒果游戏",
802
+ "新浪传媒",
803
+ "旷镜博煊",
804
+ "全民k歌",
805
+ "滴滴支付",
806
+ "北京网心科技",
807
+ "挂号网",
808
+ "萤石",
809
+ "chinavision media group limited",
810
+ "猎豹安全大师",
811
+ "cmcm",
812
+ "趣店",
813
+ "蚂蚁财富",
814
+ "商汤科技",
815
+ "甲骨文",
816
+ "百度云",
817
+ "百度apollo",
818
+ "19 pay",
819
+ "stock.pingan.com",
820
+ "tiktok",
821
+ "alibaba pictures group limited",
822
+ "ele",
823
+ "考拉",
824
+ "天猫",
825
+ "腾讯优图",
826
+ "起点中文网",
827
+ "百度视频",
828
+ "shanghai bili bili",
829
+ "京东物流",
830
+ "ebay marketplaces gmbh",
831
+ "alibaba sport",
832
+ "wish",
833
+ "阿里巴巴中国",
834
+ "中国银联",
835
+ "alibaba china network",
836
+ "china ping an property insurance",
837
+ "百度糯米网",
838
+ "微软中国",
839
+ "一九付",
840
+ "4 paradigm",
841
+ "叮咚买菜",
842
+ "umeng",
843
+ "众鸣科技",
844
+ "平安财富通",
845
+ "google",
846
+ "巨量引擎",
847
+ "百度贴吧",
848
+ "beijing jingdong century information",
849
+ "讯飞",
850
+ "beijing yunshan information",
851
+ "满运软件",
852
+ "中邮消费金融",
853
+ "饿了么",
854
+ "alios",
855
+ "腾讯ai实验室",
856
+ "第四范式智能",
857
+ "瀚星创业投资",
858
+ "gradient ventures",
859
+ "microsoft",
860
+ "哈啰共享汽车",
861
+ "乞力电子商务",
862
+ "mscf",
863
+ "网易影业文化",
864
+ "铁友旅游咨询",
865
+ "kilimall",
866
+ "云企互联投资",
867
+ "ping an financial consulting",
868
+ "beijng jingdong century commerce",
869
+ "高德威智能交通系统",
870
+ "中友信息",
871
+ "平安医疗健康管理",
872
+ "eciticcfc",
873
+ "中信证券",
874
+ "fliggy",
875
+ "电子湾",
876
+ "旷云金智",
877
+ "微粒贷",
878
+ "rsi",
879
+ "滴滴云计算",
880
+ "google ventures",
881
+ "箐程",
882
+ "每日优鲜",
883
+ "音兔",
884
+ "拉扎斯",
885
+ "今日头条",
886
+ "乐信控股",
887
+ "猎豹浏览器",
888
+ "细微咨询",
889
+ "好未来",
890
+ "我乐",
891
+ "绘声绘色",
892
+ "抖音",
893
+ "搜狐新时代",
894
+ "飞猪",
895
+ "鹅厂",
896
+ "贝壳找房",
897
+ "tuniu",
898
+ "红马传媒文化",
899
+ "钉钉",
900
+ "马上消费金融",
901
+ "360手机",
902
+ "平安医保",
903
+ "快途",
904
+ "alibaba",
905
+ "小哈换电",
906
+ "大麦",
907
+ "恒睿人工智能研究院",
908
+ "谷歌资本",
909
+ "猎豹",
910
+ "穆迪信息"
911
+ ]
deepdoc/parser/resume/entities/res/good_sch.json ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "中国科技大学",
3
+ "国防科学技术大学",
4
+ "清华大学",
5
+ "清华",
6
+ "tsinghua university",
7
+ "thu",
8
+ "北京大学",
9
+ "北大",
10
+ "beijing university",
11
+ "pku",
12
+ "中国科学技术大学",
13
+ "中国科大",
14
+ "中科大",
15
+ "china science & technology university",
16
+ "ustc",
17
+ "复旦大学",
18
+ "复旦",
19
+ "fudan university",
20
+ "fdu",
21
+ "中国人民大学",
22
+ "人大",
23
+ "人民大学",
24
+ "renmin university of china",
25
+ "ruc",
26
+ "上海交通大学",
27
+ "上海交大",
28
+ "shanghai jiao tong university",
29
+ "sjtu",
30
+ "南京大学",
31
+ "南大",
32
+ "nanjing university",
33
+ "nju",
34
+ "同济大学",
35
+ "同济",
36
+ "tongji university",
37
+ "tongji",
38
+ "浙江大学",
39
+ "浙大",
40
+ "zhejiang university",
41
+ "zju",
42
+ "南开大学",
43
+ "南开",
44
+ "nankai university",
45
+ "nku",
46
+ "北京航空航天大学",
47
+ "北航",
48
+ "beihang university",
49
+ "buaa",
50
+ "北京师范大学",
51
+ "北师",
52
+ "北师大",
53
+ "beijing normal university",
54
+ "bnu",
55
+ "武汉大学",
56
+ "武大",
57
+ "wuhan university",
58
+ "whu",
59
+ "西安交通大学",
60
+ "西安交大",
61
+ "xi’an jiaotong university",
62
+ "xjtu",
63
+ "天津大学",
64
+ "天大",
65
+ "university of tianjin",
66
+ "tju",
67
+ "华中科技大学",
68
+ "华中大",
69
+ "central china university science and technology",
70
+ "hust",
71
+ "北京理工大学",
72
+ "北理",
73
+ "beijing institute of technology",
74
+ "bit",
75
+ "东南大学",
76
+ "东大",
77
+ "southeast china university",
78
+ "seu",
79
+ "中山大学",
80
+ "中大",
81
+ "zhongshan university",
82
+ "sysu",
83
+ "华东师范大学",
84
+ "华师大",
85
+ "east china normal university",
86
+ "ecnu",
87
+ "哈尔滨工业大学",
88
+ "哈工大",
89
+ "harbin institute of technology",
90
+ "hit",
91
+ "厦门大学",
92
+ "厦大",
93
+ "xiamen university",
94
+ "xmu",
95
+ "西北工业大学",
96
+ "西工大",
97
+ "西北工大",
98
+ "northwestern polytechnical university",
99
+ "npu",
100
+ "中南大学",
101
+ "中南",
102
+ "middle and southern university",
103
+ "csu",
104
+ "大连理工大学",
105
+ "大工",
106
+ "institute of technology of dalian",
107
+ "dut",
108
+ "四川大学",
109
+ "川大",
110
+ "sichuan university",
111
+ "scu",
112
+ "电子科技大学",
113
+ "电子科大",
114
+ "university of electronic science and technology of china",
115
+ "uestc",
116
+ "华南理工大学",
117
+ "华南理工",
118
+ "institutes of technology of south china",
119
+ "scut",
120
+ "吉林大学",
121
+ "吉大",
122
+ "jilin university",
123
+ "jlu",
124
+ "湖南大学",
125
+ "湖大",
126
+ "hunan university",
127
+ "hnu",
128
+ "重庆大学",
129
+ "重大",
130
+ "university of chongqing",
131
+ "cqu",
132
+ "山东大学",
133
+ "山大",
134
+ "shandong university",
135
+ "sdu",
136
+ "中国农业大学",
137
+ "中国农大",
138
+ "china agricultural university",
139
+ "cau",
140
+ "中国海洋大学",
141
+ "中国海大",
142
+ "chinese marine university",
143
+ "ouc",
144
+ "中央民族大学",
145
+ "中央民大",
146
+ "central university for nationalities",
147
+ "muc",
148
+ "东北大学",
149
+ "东北工学院",
150
+ "northeastern university",
151
+ "neu 或 nu",
152
+ "兰州大学",
153
+ "兰大",
154
+ "lanzhou university",
155
+ "lzu",
156
+ "西北农林科技大学",
157
+ "西农","西北农大",
158
+ "northwest a&f university",
159
+ "nwafu",
160
+ "中国人民解放军国防科技大学",
161
+ "国防科技大学","国防科大",
162
+ "national university of defense technology",
163
+ "nudt",
164
+ "郑州大学",
165
+ "郑大",
166
+ "zhengzhou university",
167
+ "zzu",
168
+ "云南大学",
169
+ "云大",
170
+ "yunnan university",
171
+ "ynu",
172
+ "新疆大学",
173
+ "新大",
174
+ "xinjiang university",
175
+ "xju",
176
+ "北京交通大学",
177
+ "北京交大",
178
+ "beijing jiaotong university",
179
+ "bjtu",
180
+ "北京工业大学",
181
+ "北工大",
182
+ "beijing university of technology",
183
+ "bjut",
184
+ "北京科技大学",
185
+ "北科大","北京科大",
186
+ "university of science and technology beijing",
187
+ "ustb",
188
+ "北京化工大学",
189
+ "北化",
190
+ "beijing university of chemical technology",
191
+ "buct",
192
+ "北京邮电大学",
193
+ "北邮",
194
+ "beijing university of posts and telecommunications",
195
+ "beijing university of post and telecommunications",
196
+ "beijing university of post and telecommunication",
197
+ "beijing university of posts and telecommunication",
198
+ "bupt",
199
+ "北京林业大学",
200
+ "北林",
201
+ "beijing forestry university",
202
+ "bfu",
203
+ "北京协和医学院",
204
+ "协和医学院",
205
+ "peking union medical college",
206
+ "pumc",
207
+ "北京中医药大学",
208
+ "北中医",
209
+ "beijing university of chinese medicine",
210
+ "bucm",
211
+ "首都师范大学",
212
+ "首师大",
213
+ "capital normal university",
214
+ "cnu",
215
+ "北京外国语大学",
216
+ "北外",
217
+ "beijing foreign studies university",
218
+ "bfsu",
219
+ "中国传媒大学",
220
+ "中媒",
221
+ "中传",
222
+ "北京广播学院",
223
+ "communication university of china",
224
+ "cuc",
225
+ "中央财经大学",
226
+ "中央财大",
227
+ "中财大",
228
+ "the central university of finance and economics",
229
+ "cufe",
230
+ "对外经济贸易大学",
231
+ "对外经贸大学",
232
+ "贸大",
233
+ "university of international business and economics",
234
+ "uibe",
235
+ "外交学院",
236
+ "外院",
237
+ "china foreign affairs university",
238
+ "cfau",
239
+ "中国人民公安大学",
240
+ "公安大学",
241
+ "people's public security university of china",
242
+ "ppsuc",
243
+ "北京体育大学",
244
+ "北体大",
245
+ "beijing sport university",
246
+ "bsu",
247
+ "中央音乐学院",
248
+ "央音",
249
+ "中央院",
250
+ "central conservatory of music",
251
+ "ccom",
252
+ "中国音乐学院",
253
+ "国音",
254
+ "中国院",
255
+ "china conservatory of music",
256
+ "ccmusic",
257
+ "中央美术学院",
258
+ "央美",
259
+ "central academy of fine art",
260
+ "cafa",
261
+ "中央���剧学院",
262
+ "中戏",
263
+ "the central academy of drama",
264
+ "tcad",
265
+ "中国政法大学",
266
+ "法大",
267
+ "china university of political science and law",
268
+ "zuc",
269
+ "cupl",
270
+ "中国科学院大学",
271
+ "国科大",
272
+ "科院大",
273
+ "university of chinese academy of sciences",
274
+ "ucas",
275
+ "福州大学",
276
+ "福大",
277
+ "university of fuzhou",
278
+ "fzu",
279
+ "暨南大学",
280
+ "暨大",
281
+ "ji'nan university",
282
+ "jnu",
283
+ "广州中医药大学",
284
+ "广中医",
285
+ "traditional chinese medicine university of guangzhou",
286
+ "gucm",
287
+ "华南师范大学",
288
+ "华南师大",
289
+ "south china normal university",
290
+ "scnu",
291
+ "广西大学",
292
+ "西大",
293
+ "guangxi university",
294
+ "gxu",
295
+ "贵州大学",
296
+ "贵大",
297
+ "guizhou university",
298
+ "gzu",
299
+ "海南大学",
300
+ "海大",
301
+ "university of hainan",
302
+ "hainu",
303
+ "河南大学",
304
+ "河大",
305
+ "he'nan university",
306
+ "henu",
307
+ "哈尔滨工程大学",
308
+ "哈工程",
309
+ "harbin engineering university",
310
+ "heu",
311
+ "东北农业大学",
312
+ "东北农大",
313
+ "northeast agricultural university",
314
+ "neau",
315
+ "东北林业大学",
316
+ "东北林大",
317
+ "northeast forestry university",
318
+ "nefu",
319
+ "中国地质大学",
320
+ "地大",
321
+ "china university of geosciences",
322
+ "cug",
323
+ "武汉理工大学",
324
+ "武汉理工",
325
+ "wuhan university of technology",
326
+ "wut",
327
+ "华中农业大学",
328
+ "华中农大",
329
+ "华农",
330
+ "central china agricultural university",
331
+ "hzau",
332
+ "华中师范大学",
333
+ "华中师大",
334
+ "华大",
335
+ "central china normal university",
336
+ "ccnu",
337
+ "中南财经政法大学",
338
+ "中南大",
339
+ "zhongnan university of economics & law",
340
+ "zuel",
341
+ "湖南师范大学",
342
+ "湖南师大",
343
+ "hunan normal university",
344
+ "hunnu",
345
+ "延边大学",
346
+ "延大",
347
+ "yanbian university",
348
+ "ybu",
349
+ "东北师范大学",
350
+ "东北师大",
351
+ "northeast normal university",
352
+ "nenu",
353
+ "苏州大学",
354
+ "苏大",
355
+ "soochow university",
356
+ "suda",
357
+ "南京航空航天大学",
358
+ "南航",
359
+ "nanjing aero-space university",
360
+ "nuaa",
361
+ "南京理工大学",
362
+ "南理工",
363
+ "institutes of technology of nanjing",
364
+ "njust",
365
+ "中国矿业大学",
366
+ "中国矿大",
367
+ "china mining university",
368
+ "cumt",
369
+ "南京邮电大学",
370
+ "南邮",
371
+ "nanjing university of posts and telecommunications",
372
+ "njupt",
373
+ "河海大学",
374
+ "河海",
375
+ "river sea university",
376
+ "hhu",
377
+ "江南大学",
378
+ "江南大",
379
+ "jiangnan university",
380
+ "jiangnan",
381
+ "南京林业大学",
382
+ "南林",
383
+ "nanjing forestry university",
384
+ "njfu",
385
+ "南京信息工程大学",
386
+ "南信大",
387
+ "nanjing university of information science and technology",
388
+ "nuist",
389
+ "南京农业大学",
390
+ "南农",
391
+ "南农大",
392
+ "南京农大",
393
+ "agricultural university of nanjing",
394
+ "njau",
395
+ "nau",
396
+ "南京中医药大学",
397
+ "南中医",
398
+ "nanjing university of chinese medicine",
399
+ "njucm",
400
+ "中国药科大学",
401
+ "中国药大",
402
+ "china medicine university",
403
+ "cpu",
404
+ "南京师范大学",
405
+ "南京师大",
406
+ "南师大",
407
+ "南师",
408
+ "nanjing normal university",
409
+ "nnu",
410
+ "南昌大学",
411
+ "昌大",
412
+ "university of nanchang","nanchang university",
413
+ "ncu",
414
+ "辽宁大学",
415
+ "辽大",
416
+ "liaoning university",
417
+ "lnu",
418
+ "大连海事大学",
419
+ "大连海大",
420
+ "海大",
421
+ "maritime affairs university of dalian",
422
+ "dmu",
423
+ "内蒙古大学",
424
+ "内大",
425
+ "university of the inner mongol","inner mongolia university",
426
+ "imu",
427
+ "宁夏大学",
428
+ "宁大",
429
+ "ningxia university",
430
+ "nxu",
431
+ "青海大学",
432
+ "清大",
433
+ "qinghai university",
434
+ "qhu",
435
+ "中国石油大学",
436
+ "中石大",
437
+ "china university of petroleum beijing",
438
+ "upc",
439
+ "太原理工大学",
440
+ "太原理工",
441
+ "institutes of technology of taiyuan","taiyuan university of technology",
442
+ "tyut",
443
+ "西北大学",
444
+ "西大",
445
+ "northwest university",
446
+ "nwu",
447
+ "西安电子科技大学",
448
+ "西电",
449
+ "xidian university",
450
+ "xdu",
451
+ "长安大学",
452
+ "长大",
453
+ "chang`an university",
454
+ "chu",
455
+ "陕西师范大学",
456
+ "陕西师大",
457
+ "陕师大",
458
+ "shaanxi normal university",
459
+ "snnu",
460
+ "第四军医大学",
461
+ "空军军医大学","四医大",
462
+ "air force medical university",
463
+ "fmmu",
464
+ "华东理工大学",
465
+ "华理",
466
+ "east china university of science",
467
+ "ecust",
468
+ "东华大学",
469
+ "东华",
470
+ "donghua university",
471
+ "dhu",
472
+ "上海海洋大学",
473
+ "上海海大",
474
+ "shanghai ocean university",
475
+ "shou",
476
+ "上海中医药大学",
477
+ "上中医",
478
+ "shanghai university of traditional chinese medicine",
479
+ "shutcm",
480
+ "上海外国语大学",
481
+ "上外",
482
+ "shanghai international studies university",
483
+ "sisu",
484
+ "上海财经大学",
485
+ "上海财大",
486
+ "上财",
487
+ "shanghai university of finance",
488
+ "sufe",
489
+ "上海体育学院",
490
+ "shanghai university of sport",
491
+ "上海音乐学院",
492
+ "上音",
493
+ "shanghai conservatory of music",
494
+ "shcm",
495
+ "上海大学",
496
+ "上大",
497
+ "shanghai university",
498
+ "第二军医大学",
499
+ "海军军医大学",
500
+ "naval medical university",
501
+ "西南交通大学",
502
+ "西南交大",
503
+ "southwest jiaotong university",
504
+ "swjtu",
505
+ "西南石油大学",
506
+ "西南石大",
507
+ "southwest petroleum university",
508
+ "swpu",
509
+ "成都理工大学",
510
+ "成都理工",
511
+ "chengdu university of technology",
512
+ "cdut ",
513
+ "四川农业大学",
514
+ "川农",
515
+ "川农大",
516
+ "sichuan agricultural university",
517
+ "sicau",
518
+ "成都中医药大学",
519
+ "成中医",
520
+ "chengdu university of tcm",
521
+ "cdutcm",
522
+ "西南财经大学",
523
+ "西南财大",
524
+ "西财",
525
+ "southwestern university of finance and economics",
526
+ "swufe",
527
+ "天津工业大学",
528
+ "天工大",
529
+ "tianjin university of technology",
530
+ "tgu",
531
+ "天津医科大学",
532
+ "天津医大",
533
+ "medical university of tianjin",
534
+ "tmu",
535
+ "天津中医药大学",
536
+ "天中",
537
+ "tianjin university of traditional chinese medicine",
538
+ "tutcm",
539
+ "华北电力大学",
540
+ "华电",
541
+ "north china electric power university",
542
+ "ncepu",
543
+ "河北工业大学",
544
+ "河工大",
545
+ "hebei university of technology",
546
+ "hebut",
547
+ "西藏大学",
548
+ "藏大",
549
+ "tibet university",
550
+ "tu",
551
+ "石河子大学",
552
+ "石大",
553
+ "shihezi university",
554
+ "中国美术学院",
555
+ "中国美院",
556
+ "国美",
557
+ "china academy of art",
558
+ "caa",
559
+ "宁波大学",
560
+ "宁大",
561
+ "ningbo university",
562
+ "nbu",
563
+ "西南大学",
564
+ "西大",
565
+ "southwest university",
566
+ "swu",
567
+ "安徽大学",
568
+ "安大",
569
+ "university of anhui",
570
+ "ahu",
571
+ "合肥工业大学",
572
+ "合肥工大",
573
+ "合工大",
574
+ "hefei university of technology",
575
+ "hfut",
576
+ "中国地质大学",
577
+ "地大",
578
+ "china university of geosciences",
579
+ "cug",
580
+ "中国地质大学",
581
+ "地大",
582
+ "北京地大",
583
+ "cugb",
584
+ "中国矿业大学",
585
+ "中国矿大",
586
+ "china university of mining & technology",
587
+ "cumtb",
588
+ "中国石油大学",
589
+ "中石大",
590
+ "石大",
591
+ "china university of petroleum",
592
+ "cup",
593
+ "中国石油大学",
594
+ "中石大",
595
+ "cup"]
deepdoc/parser/resume/entities/res/school.rank.csv ADDED
@@ -0,0 +1,1627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 清华大学,2,985,清华
2
+ 清华大学,2,985,Tsinghua University
3
+ 清华大学,2,985,THU
4
+ 北京大学,1,985,北大
5
+ 北京大学,1,985,Beijing University
6
+ 北京大学,1,985,PKU
7
+ 中国科学技术大学,14,985,中国科大
8
+ 中国科学技术大学,14,985,中科大
9
+ 中国科学技术大学,14,985,China Science & Technology University
10
+ 中国科学技术大学,14,985,USTC
11
+ 复旦大学,5,985,复旦
12
+ 复旦大学,5,985,Fudan University
13
+ 复旦大学,5,985,FDU
14
+ 中国人民大学,15,985,人大
15
+ 中国人民大学,15,985,人民大学
16
+ 中国人民大学,15,985,Renmin University Of China
17
+ 中国人民大学,15,985,RUC
18
+ 上海交通大学,4,985,上海交大
19
+ 上海交通大学,4,985,Shanghai Jiao Tong University
20
+ 上海交通大学,4,985,SJTU
21
+ 南京大学,11,985,南大
22
+ 南京大学,11,985,Nanjing University
23
+ 南京大学,11,985,NJU
24
+ 同济大学,17,985,同济
25
+ 同济大学,17,985,Tongji University
26
+ 同济大学,17,985,Tongji
27
+ 浙江大学,3,985,浙大
28
+ 浙江大学,3,985,Zhejiang University
29
+ 浙江大学,3,985,ZJU
30
+ 南开大学,27,985,南开
31
+ 南开大学,27,985,Nankai University
32
+ 南开大学,27,985,NKU
33
+ 北京航空航天大学,21,985,北航
34
+ 北京航空航天大学,21,985,Beihang University
35
+ 北京航空航天大学,21,985,BUAA
36
+ 北京师范大学,23,985,北师
37
+ 北京师范大学,23,985,北师大
38
+ 北京师范大学,23,985,Beijing Normal University
39
+ 北京师范大学,23,985,BNU
40
+ 武汉大学,6,985,武大
41
+ 武汉大学,6,985,Wuhan University
42
+ 武汉大学,6,985,WHU
43
+ 西安交通大学,13,985,西安交大
44
+ 西安交通大学,13,985,Xi’an Jiaotong University
45
+ 西安交通大学,13,985,XJTU
46
+ 天津大学,22,985,天大
47
+ 天津大学,22,985,University Of Tianjin
48
+ 天津大学,22,985,TJU
49
+ 华中科技大学,8,985,华中大
50
+ 华中科技大学,8,985,Central China University Science and Technology
51
+ 华中科技大学,8,985,HUST
52
+ 北京理工大学,24,985,北理
53
+ 北京理工大学,24,985,Beijing Institute of Technology
54
+ 北京理工大学,24,985,BIT
55
+ 东南大学,20,985,东大
56
+ 东南大学,20,985,Southeast China University
57
+ 东南大学,20,985,SEU
58
+ 中山大学,9,985,中大
59
+ 中山大学,9,985,Zhongshan University
60
+ 中山大学,9,985,SYSU
61
+ 华东师范大学,30,985,华师大
62
+ 华东师范大学,30,985,East China Normal University
63
+ 华东师范大学,30,985,ECNU
64
+ 哈尔滨工业大学,10,985,哈工大
65
+ 哈尔滨工业大学,10,985,Harbin Institute Of Technology
66
+ 哈尔滨工业大学,10,985,HIT
67
+ 厦门大学,28,985,厦大
68
+ 厦门大学,28,985,Xiamen University
69
+ 厦门大学,28,985,XMU
70
+ 西北工业大学,31,985,西工大
71
+ 西北工业大学,31,985,西北工大
72
+ 西北工业大学,31,985,Northwestern Polytechnical University
73
+ 西北工业大学,31,985,NPU
74
+ 中南大学,19,985,中南
75
+ 中南大学,19,985,Middle and Southern University
76
+ 中南大学,19,985,CSU
77
+ 大连理工大学,26,985,大工
78
+ 大连理工大学,26,985,Institute Of Technology Of Dalian
79
+ 大连理工大学,26,985,DUT
80
+ 四川大学,7,985,川大
81
+ 四川大学,7,985,Sichuan University
82
+ 四川大学,7,985,SCU
83
+ 电子科技大学,32,985,电子科大
84
+ 电子科技大学,32,985,University of Electronic Science and Technology of China
85
+ 电子科技大学,32,985,UESTC
86
+ 华南理工大学,25,985,华南理工
87
+ 华南理工大学,25,985,Institutes Of Technology Of South China
88
+ 华南理工大学,25,985,SCUT
89
+ 吉林大学,12,985,吉大
90
+ 吉林大学,12,985,Jilin University
91
+ 吉林大学,12,985,JLU
92
+ 湖南大学,38,985,湖大
93
+ 湖南大学,38,985,Hunan University
94
+ 湖南大学,38,985,HNU
95
+ 重庆大学,29,985,重大
96
+ 重庆大学,29,985,University Of Chongqing
97
+ 重庆大学,29,985,CQU
98
+ 山东大学,16,985,山大
99
+ 山东大学,16,985,Shandong University
100
+ 山东大学,16,985,SDU
101
+ 中国农业大学,37,985,中国农大
102
+ 中国农业大学,37,985,China Agricultural University
103
+ 中国农业大学,37,985,CAU
104
+ 中国海洋大学,60,985,中国海大
105
+ 中国海洋大学,60,985,Chinese Marine University
106
+ 中国海洋大学,60,985,OUC
107
+ 中央民族大学,106,985,中央民大
108
+ 中央民族大学,106,985,Central University For Nationalities
109
+ 中央民族大学,106,985,MUC
110
+ 东北大学,41,985,东北工学院
111
+ 东北大学,41,985,Northeastern University
112
+ 东北大学,41,985,NEU
113
+ 东北大学,41,985,NU
114
+ 兰州大学,39,985,兰大
115
+ 兰州大学,39,985,Lanzhou University
116
+ 兰州大学,39,985,LZU
117
+ 西北农林科技大学,78,985,西农、西北农大
118
+ 西北农林科技大学,78,985,Northwest A&F University
119
+ 西北农林科技大学,78,985,NWAFU
120
+ 中国人民解放军国防科技大学,89,985,国防科技大学、国防科大、国防科学技术大学
121
+ 中国人民解放军国防科技大学,89,985,National University of Defense Technology
122
+ 中国人民解放军国防科技大学,89,985,NUDT
123
+ 郑州大学,34,211,郑大
124
+ 郑州大学,34,211,Zhengzhou University
125
+ 郑州大学,34,211,ZZU
126
+ 云南大学,75,211,云大
127
+ 云南大学,75,211,Yunnan University
128
+ 云南大学,75,211,YNU
129
+ 新疆大学,114,211,新大
130
+ 新疆大学,114,211,Xinjiang University
131
+ 新疆大学,114,211,XJU
132
+ 北京交通大学,49,211,北京交大
133
+ 北京交通大学,49,211,Beijing Jiaotong University
134
+ 北京交通大学,49,211,BJTU
135
+ 北京工业大学,63,211,北工大
136
+ 北京工业大学,63,211,Beijing University Of Technology
137
+ 北京工业大学,63,211,BJUT
138
+ 北京科技大学,48,211,北科大、北京科大
139
+ 北京科技大学,48,211,University Of Science and Technology Beijing
140
+ 北京科技大学,48,211,USTB
141
+ 北京化工大学,82,211,北化
142
+ 北京化工大学,82,211,Beijing University of Chemical Technology
143
+ 北京化工大学,82,211,BUCT
144
+ 北京邮电大学,76,211,北邮
145
+ 北京邮电大学,76,211,Beijing University Of Posts and Telecommunications
146
+ 北京邮电大学,76,211,BUPT
147
+ 北京林业大学,88,211,北林
148
+ 北京林业大学,88,211,Beijing Forestry University
149
+ 北京林业大学,88,211,BFU
150
+ 北京协和医学院,,双一流,协和医学院
151
+ 北京协和医学院,,双一流,Peking Union Medical College
152
+ 北京协和医学院,,双一流,PUMC
153
+ 北京中医药大学,121,211,北中医
154
+ 北京中医药大学,121,211,Beijing University Of Chinese Medicine
155
+ 北京中医药大学,121,211,BUCM
156
+ 首都师范大学,,双一流,首师大
157
+ 首都师范大学,,双一流,Capital Normal University
158
+ 首都师范大学,,双一流,CNU
159
+ 北京外国语大学,124,211,北外
160
+ 北京外国语大学,124,211,Beijing Foreign Studies University
161
+ 北京外国语大学,124,211,BFSU
162
+ 中国传媒大学,96,211,中媒
163
+ 中国传媒大学,96,211,中传
164
+ 中国传媒大学,96,211,北京广播学院
165
+ 中国传媒大学,96,211,Communication University Of China
166
+ 中国传媒大学,96,211,CUC
167
+ 中央财经大学,79,211,中央财大
168
+ 中央财经大学,79,211,中财大
169
+ 中央财经大学,79,211,The Central University Of Finance and Economics
170
+ 中央财经大学,79,211,CUFE
171
+ 对外经济贸易大学,99,211,对外经贸大学
172
+ 对外经济贸易大学,99,211,贸大
173
+ 对外经济贸易大学,99,211,University Of International Business and Economics
174
+ 对外经济贸易大学,99,211,UIBE
175
+ 外交学院,,双一流,外院
176
+ 外交学院,,双一流,China Foreign Affairs University
177
+ 外交学院,,双一流,CFAU
178
+ 中国人民公安大学,,双一流,公安大学
179
+ 中国人民公安大学,,双一流,People's Public Security University of China
180
+ 中国人民公安大学,,双一流,PPSUC
181
+ 北京体育大学,122,211,北体大
182
+ 北京体育大学,122,211,Beijing Sport University
183
+ 北京体育大学,122,211,BSU
184
+ 中央音乐学院,103,211,央音
185
+ 中央音乐学院,103,211,中央院
186
+ 中央音乐学院,103,211,Central Conservatory Of Music
187
+ 中央音乐学院,103,211,CCOM
188
+ 中国音乐学院,,双一流,国音
189
+ 中国音乐学院,,双一流,中国院
190
+ 中国音乐学院,,双一流,China Conservatory of Music
191
+ 中国音乐学院,,双一流,CCMUSIC
192
+ 中央美术学院,104,双一流,央美
193
+ 中央美术学院,104,双一流,Central Academy of Fine Art
194
+ 中央美术学院,104,双一流,CAFA
195
+ 中央戏剧学院,133,双一流,中戏
196
+ 中央戏剧学院,133,双一流,The Central Academy Of Drama
197
+ 中央戏剧学院,133,双一流,TCAD
198
+ 中国政法大学,90,211,法大
199
+ 中国政法大学,90,211,China University Of Political Science and Law
200
+ 中国政法大学,90,211,ZUC
201
+ 中国政法大学,90,211,CUPL
202
+ 中国科学院大学,18,双一流,国科大
203
+ 中国科学院大学,18,双一流,科院大
204
+ 中国科学院大学,18,双一流,University of Chinese Academy of Sciences
205
+ 中国科学院大学,18,双一流,UCAS
206
+ 福州大学,72,211,福大
207
+ 福州大学,72,211,University Of Fuzhou
208
+ 福州大学,72,211,FZU
209
+ 暨南大学,44,211,暨大
210
+ 暨南大学,44,211,Ji'nan University
211
+ 暨南大学,44,211,JNU
212
+ 广州中医药大学,,双一流,广中医
213
+ 广州中医药大学,,双一流,Traditional Chinese Medicine University Of Guangzhou
214
+ 广州中医药大学,,双一流,GUCM
215
+ 华南师范大学,55,211,华南师大
216
+ 华南师范大学,55,211,South China Normal University
217
+ 华南师范大学,55,211,SCNU
218
+ 广西大学,71,211,广西大
219
+ 广西大学,71,211,Guangxi University
220
+ 广西大学,71,211,GXU
221
+ 贵州大学,94,211,贵大
222
+ 贵州大学,94,211,Guizhou University
223
+ 贵州大学,94,211,GZU
224
+ 海南大学,101,211,海大
225
+ 海南大学,101,211,University Of Hainan
226
+ 海南大学,101,211,HAINU
227
+ 河南大学,85,双一流,河大
228
+ 河南大学,85,双一流,He'nan University
229
+ 河南大学,85,双一流,HENU
230
+ 哈尔滨工程大学,65,211,哈工程
231
+ 哈尔滨工程大学,65,211,Harbin Engineering University
232
+ 哈尔滨工程大学,65,211,HEU
233
+ 东北农业大学,98,211,东北农大
234
+ 东北农业大学,98,211,Northeast Agricultural University
235
+ 东北农业大学,98,211,NEAU
236
+ 东北林业大学,93,211,东北林大
237
+ 东北林业大学,93,211,Northeast Forestry University
238
+ 东北林业大学,93,211,NEFU
239
+ 中国地质大学,80,211,地大
240
+ 中国地质大学,80,211,China University Of Geosciences
241
+ 中国地质大学,80,211,CUG
242
+ 武汉理工大学,40,211,武汉理工
243
+ 武汉理工大学,40,211,Wuhan University of Technology
244
+ 武汉理工大学,40,211,WUT
245
+ 华中农业大学,52,211,华中农大
246
+ 华中农业大学,52,211,华农
247
+ 华中农业大学,52,211,Central China Agricultural University
248
+ 华中农业大学,52,211,HZAU
249
+ 华中师范大学,58,211,华中师大
250
+ 华中师范大学,58,211,华大
251
+ 华中师范大学,58,211,Central China Normal University
252
+ 华中师范大学,58,211,CCNU
253
+ 中南财经政法大学,105,211,中南大
254
+ 中南财经政法大学,105,211,Zhongnan University Of Economics & Law
255
+ 中南财经政法大学,105,211,ZUEL
256
+ 湖南师范大学,68,211,湖南师大
257
+ 湖南师范大学,68,211,Hunan Normal University
258
+ 湖南师范大学,68,211,HUNNU
259
+ 延边大学,130,211,延大
260
+ 延边大学,130,211,Yanbian University
261
+ 延边大学,130,211,YBU
262
+ 东北师范大学,69,211,东北师大
263
+ 东北师范大学,69,211,Northeast Normal University
264
+ 东北师范大学,69,211,NENU
265
+ 苏州大学,35,211,苏大
266
+ 苏州大学,35,211,Soochow University
267
+ 苏州大学,35,211,SUDA
268
+ 南京航空航天大学,33,211,南航
269
+ 南京航空航天大学,33,211,Nanjing Aero-Space University
270
+ 南京航空航天大学,33,211,NUAA
271
+ 南京理工大学,42,211,南理工
272
+ 南京理工大学,42,211,Institutes Of Technology Of Nanjing
273
+ 南京理工大学,42,211,NJUST
274
+ 中国矿业大学,61,211,中国矿大
275
+ 中国矿业大学,61,211,China Mining University
276
+ 中国矿业大学,61,211,CUMT
277
+ 南京邮电大学,,双一流,南邮
278
+ 南京邮电大学,,双一流,Nanjing University of Posts and Telecommunications
279
+ 南京邮电大学,,双一流,NJUPT
280
+ 河海大学,54,211,河海
281
+ 河海大学,54,211,River Sea University
282
+ 河海大学,54,211,HHU
283
+ 江南大学,57,211,江南大
284
+ 江南大学,57,211,Jiangnan University
285
+ 江南大学,57,211,Jiangnan
286
+ 南京林业大学,,双一流,南林
287
+ 南京林业大学,,双一流,Nanjing Forestry University
288
+ 南京林业大学,,双一流,NJFU
289
+ 南京信息工程大学,91,双一流,南信大
290
+ 南京信息工程大学,91,双一流,Nanjing University of Information Science and Technology
291
+ 南京信息工程大学,91,双一流,NUIST
292
+ 南京农业大学,53,211,南农
293
+ 南京农业大学,53,211,南农大
294
+ 南京农业大学,53,211,南京农大
295
+ 南京农业大学,53,211,Agricultural University Of Nanjing
296
+ 南京农业大学,53,211,NJAU
297
+ 南京农业大学,53,211,NAU
298
+ 南京中医药大学,,双一流,南中医
299
+ 南京中医药大学,,双一流,Nanjing University Of Chinese Medicine
300
+ 南京中医药大学,,双一流,NJUCM
301
+ 中国药科大学,119,211,中国药大
302
+ 中国药科大学,119,211,China Medicine University
303
+ 中国药科大学,119,211,CPU
304
+ 南京师范大学,56,211,南京师大
305
+ 南京师范大学,56,211,南师大
306
+ 南京师范大学,56,211,南师
307
+ 南京师范大学,56,211,Nanjing Normal University
308
+ 南京师范大学,56,211,NNU
309
+ 南昌大学,47,211,昌大
310
+ 南昌大学,47,211,University Of Nanchang、Nanchang University
311
+ 南昌大学,47,211,NCU
312
+ 辽宁大学,118,211,辽大
313
+ 辽宁大学,118,211,Liaoning University
314
+ 辽宁大学,118,211,LNU
315
+ 大连海事大学,111,211,大连海大
316
+ 大连海事大学,111,211,Maritime Affairs University Of Dalian
317
+ 大连海事大学,111,211,DMU
318
+ 内蒙古大学,116,211,内大
319
+ 内蒙古大学,116,211,University Of The Inner Mongol、Inner Mongolia University
320
+ 内蒙古大学,116,211,IMU
321
+ 宁夏大学,125,211,Ningxia University
322
+ 宁夏大学,125,211,NXU
323
+ 青海大学,129,211,清大
324
+ 青海大学,129,211,Qinghai University
325
+ 青海大学,129,211,QHU
326
+ 中国石油大学,77,双一流,中石大
327
+ 中国石油大学,77,双一流,China University Of Petroleum Beijing
328
+ 中国石油大学,77,双一流,UPC
329
+ 太原理工大学,84,211,太原理工
330
+ 太原理工大学,84,211,Institutes Of Technology Of Taiyuan、Taiyuan University of Technology
331
+ 太原理工大学,84,211,TYUT
332
+ 西北大学,59,211,西北大
333
+ 西北大学,59,211,Northwest University
334
+ 西北大学,59,211,NWU
335
+ 西安电子科技大学,50,211,西电
336
+ 西安电子科技大学,50,211,Xidian University
337
+ 西安电子科技大学,50,211,XDU
338
+ 长安大学,83,211,长大
339
+ 长安大学,83,211,Chang`an University
340
+ 长安大学,83,211,CHU
341
+ 陕西师范大学,67,211,陕西师大
342
+ 陕西师范大学,67,211,陕师大
343
+ 陕西师范大学,67,211,Shaanxi Normal University
344
+ 陕西师范大学,67,211,SNNU
345
+ 第四军医大学,,211,空军军医大学、四医大
346
+ 第四军医大学,,211,Air Force Medical University
347
+ 第四军医大学,,211,FMMU
348
+ 华东理工大学,45,211,华理
349
+ 华东理工大学,45,211,East China University Of Science
350
+ 华东理工大学,45,211,ECUST
351
+ 东华大学,74,211,东华
352
+ 东华大学,74,211,Donghua University
353
+ 东华大学,74,211,DHU
354
+ 上海海洋大学,,双一流,上海海大
355
+ 上海海洋大学,,双一流,Shanghai Ocean University
356
+ 上海海洋大学,,双一流,SHOU
357
+ 上海中医药大学,,211,上中医
358
+ 上海中医药大学,,211,Shanghai University of Traditional Chinese Medicine
359
+ 上海中医药大学,,211,SHUTCM
360
+ 上海外国语大学,123,211,上外
361
+ 上海外国语大学,123,211,Shanghai International Studies University
362
+ 上海外国语大学,123,211,SISU
363
+ 上海财经大学,95,211,上海财大
364
+ 上海财经大学,95,211,上财
365
+ 上海财经大学,95,211,Shanghai University Of Finance
366
+ 上海财经大学,95,211,SUFE
367
+ 上海体育学院,,双一流,Shanghai University Of Sport
368
+ 上海音乐学院,,双一流,上音
369
+ 上海音乐学院,,双一流,Shanghai Conservatory Of Music
370
+ 上海音乐学院,,双一流,SHCM
371
+ 上海大学,43,211,上大
372
+ 上海大学,43,211,Shanghai University
373
+ 第二军医大学,,211,海军军医大学
374
+ 第二军医大学,,211,Naval Medical University
375
+ 西南交通大学,36,211,西南交大
376
+ 西南交通大学,36,211,Southwest Jiaotong University
377
+ 西南交通大学,36,211,SWJTU
378
+ 西南石油大学,,双一流,西南石大
379
+ 西南石油大学,,双一流,Southwest Petroleum University
380
+ 西南石油大学,,双一流,SWPU
381
+ 成都理工大学,,双一流,成都理工
382
+ 成都理工大学,,双一流,Chengdu University Of Technology
383
+ 成都理工大学,,双一流,CDUT
384
+ 四川农业大学,113,211,川农
385
+ 四川农业大学,113,211,川农大
386
+ 四川农业大学,113,211,Sichuan Agricultural University
387
+ 四川农业大学,113,211,SICAU
388
+ 成都中医药大学,,双一流,成中医
389
+ 成都中医药大学,,双一流,Chengdu University of TCM
390
+ 成都中医药大学,,双一流,CDUTCM
391
+ 西南财经大学,97,211,西南财大
392
+ 西南财经大学,97,211,西财
393
+ 西南财经大学,97,211,Southwestern University Of Finance And Economics
394
+ 西南财经大学,97,211,SWUFE
395
+ 天津工业大学,,双一流,天工大
396
+ 天津工业大学,,双一流,Tianjin University of Technology
397
+ 天津工业大学,,双一流,TGU
398
+ 天津医科大学,107,211,天津医大
399
+ 天津医科大学,107,211,Medical University Of Tianjin
400
+ 天津医科大学,107,211,TMU
401
+ 天津中医药大学,,双一流,天中
402
+ 天津中医药大学,,双一流,Tianjin University of Traditional Chinese Medicine
403
+ 天津中医药大学,,双一流,TUTCM
404
+ 华北电力大学,73,211,华电
405
+ 华北电力大学,73,211,North China Electric Power University
406
+ 华北电力大学,73,211,NCEPU
407
+ 河北工业大学,92,211,河工大
408
+ 河北工业大学,92,211,Hebei University of Technology
409
+ 河北工业大学,92,211,HEBUT
410
+ 西藏大学,135,211,藏大
411
+ 西藏大学,135,211,Tibet University
412
+ 西藏大学,135,211,TU
413
+ 石河子大学,117,211,石大
414
+ 石河子大学,117,211,Shihezi University
415
+ 中国美术学院,,双一流,中国美院
416
+ 中国美术学院,,双一流,国美
417
+ 中国美术学院,,双一流,China Academy of Art
418
+ 中国美术学院,,双一流,CAA
419
+ 宁波大学,70,双一流,Ningbo University
420
+ 宁波大学,70,双一流,NBU
421
+ 西南大学,46,985,西南大
422
+ 西南大学,46,985,Southwest University
423
+ 西南大学,46,985,SWU
424
+ 安徽大学,81,211,安大
425
+ 安徽大学,81,211,University Of Anhui
426
+ 安徽大学,81,211,AHU
427
+ 合肥工业大学,51,211,合肥工大
428
+ 合肥工业大学,51,211,合工大
429
+ 合肥工业大学,51,211,HeFei University of Technology
430
+ 合肥工业大学,51,211,HFUT
431
+ 麻省理工学院,1,海外名校,Massachusetts Institute of Technology
432
+ 麻省理工学院,1,海外名校,MIT
433
+ 麻省理工学院,1,海外名校,麻省
434
+ 麻省理工学院,1,海外名校,马萨诸塞理工学院
435
+ 牛津大学,2,海外名校,University of Oxford
436
+ 牛津大学,2,海外名校,Oxford
437
+ 牛津大学,2,海外名校,Oxon
438
+ 牛津大学,2,海外名校,牛津
439
+ 斯坦福大学,3,海外名校,Stanford University
440
+ 斯坦福大学,3,海外名校,Leland Stanford Junior University
441
+ 斯坦福大学,3,海外名校,斯坦福
442
+ 斯坦福大学,3,海外名校,Stanford
443
+ 剑桥大学,3,海外名校,University of Cambridge
444
+ 剑桥大学,3,海外名校,剑桥
445
+ 哈佛大学,5,海外名校,Harvard University
446
+ 哈佛大学,5,海外名校,哈佛
447
+ 加州理工学院,6,海外名校,California Institute of Technology
448
+ 加州理工学院,6,海外名校,Caltech
449
+ 加州理工学院,6,海外名校,加州理工
450
+ 帝国理工学院,,海外名校,Imperial College London
451
+ 帝国理工学院,,海外名校,帝国理工医学院
452
+ 帝国理工学院,,海外名校,Imperial College of Science
453
+ 帝国理工学院,,海外名校,Technology and Medicine
454
+ 帝国理工学院,,海外名校,帝国学院
455
+ 帝国理工学院,,海外名校,帝国理工
456
+ 帝国理工学院,,海外名校,Imperial
457
+ 帝国理工学院,,海外名校,IC
458
+ 苏黎世联邦理工学院,8,海外名校,苏黎世理工
459
+ 苏黎世联邦理工学院,8,海外名校,ETH
460
+ 苏黎世联邦理工学院,8,海外名校,Swiss Federal Institute of Technology in Zurich
461
+ 伦敦大学学院,8,海外名校,UCL
462
+ 伦敦大学学院,8,海外名校,University College London
463
+ 芝加哥大学,10,海外名校,University of Chicago
464
+ 芝加哥大学,10,海外名校,芝大
465
+ 芝加哥大学,10,海外名校,UChicago
466
+ 新加坡国立大学,11,海外名校,National University of Singapore
467
+ 新加坡国立大学,11,海外名校,NUS
468
+ 南洋理工大学,12,海外名校,Nanyang Technological University
469
+ 南洋理工大学,12,海外名校,Singapore
470
+ 南洋理工大学,12,海外名校,NTU
471
+ 宾夕法尼亚大学,13,海外名校,University of Pennsylvania
472
+ 宾夕法尼亚大学,13,海外名校,UPenn
473
+ 宾夕法尼亚大学,13,海外名校,宾大
474
+ 洛桑联邦理工学院,14,海外名校,EPFL
475
+ 洛桑联邦理工学院,14,海外名校,Swiss federal Institute of Technology in Lausanne
476
+ 耶鲁大学,14,海外名校,Yale University
477
+ 耶鲁大学,14,海外名校,耶鲁
478
+ 耶鲁大学,14,海外名校,Yale
479
+ 爱丁堡大学,16,海外名校,The University of Edinburgh
480
+ 爱丁堡大学,16,海外名校,Edin
481
+ 爱丁堡大学,16,海外名校,爱大
482
+ 哥伦比亚大学,19,海外名校,Columbia University
483
+ 哥伦比亚大学,19,海外名校,哥大
484
+ 哥伦比亚大学,19,海外名校,Columbia University in the City of New York
485
+ 普林斯顿大学,20,海外名校,Princeton University
486
+ 普林斯顿大学,20,海外名校,普林斯顿
487
+ 康奈尔大学,21,海外名校,Cornell University
488
+ 康奈尔大学,21,海外名校,Cornell
489
+ 康奈尔大学,21,海外名校,康奈尔
490
+ 香港大学,22,海外名校,The University of Hong Kong
491
+ 香港大学,22,海外名校,HKU
492
+ 香港大学,22,海外名校,港大
493
+ 东京大学,23,海外名校,The University of Tokyo
494
+ 东京大学,23,海外名校,東京大学
495
+ 东京大学,23,海外名校,UTokyo
496
+ 密歇根大学安娜堡分校,,海外名校,University of Michigan-Ann Arbor
497
+ 密歇根大学安娜堡分校,,海外名校,UMich
498
+ 约翰霍普金斯大学,25,海外名校,Johns Hopkins University
499
+ 约翰霍普金斯大学,25,海外名校,JHU
500
+ 约翰霍普金斯大学,25,海外名校,Hopkins
501
+ 约翰霍普金斯大学,25,海外名校,霍普金斯大学
502
+ 多伦多大学,26,海外名校,University of Toronto
503
+ 多伦多大学,26,海外名校,UofT
504
+ 多伦多大学,26,海外名校,UToronto
505
+ 麦吉尔大学,27,海外名校,McGill University
506
+ 麦吉尔大学,27,海外名校,McGill
507
+ 澳洲国立大学,,海外名校,The Australian National University
508
+ 澳洲国立大学,,海外名校,ANU
509
+ 曼彻斯特大学,27,海外名校,The University of Manchester
510
+ 曼彻斯特大学,27,海外名校,UoM
511
+ 京都大学,33,海外名校,Kyoto University
512
+ 香港科技大学,34,海外名校,The Hong Kong University of Science and Technology
513
+ 香港科技大学,34,海外名校,HKUST
514
+ 伦敦大学国王学院,,海外名校,King's College London
515
+ 伦敦大学国王学院,,海外名校,King's
516
+ 伦敦大学国王学院,,海外名校,KCL
517
+ 首尔国立大学,36,海外名校,Seoul National University
518
+ 首尔国立大学,36,海外名校,SNU
519
+ 墨尔本大学,37,海外名校,The University of Melbourne
520
+ 墨尔本大学,37,海外名校,UniMelb
521
+ 墨尔本大学,37,海外名校,Melb Uni
522
+ 悉尼大学,38,海外名校,The University of Sydney
523
+ 悉尼大学,38,海外名校,USYD
524
+ 悉尼大学,38,海外名校,Sydney U
525
+ 香港中文大学,39,海外名校,The Chinese University of Hong Kong
526
+ 香港中文大学,39,海外名校,CUHK
527
+ 韩国高等科技学院,,海外名校,Korea Advanced Institute of Science And Technology
528
+ 韩国高等科技学院,,海外名校,KAIST
529
+ 纽约大学,42,海外名校,New York University
530
+ 纽约大学,42,海外名校,NYU
531
+ 新南威尔士大学,43,海外名校,The University of New South Wales
532
+ 新南威尔士大学,43,海外名校,UNSW
533
+ 巴黎第九大学,,海外名校,University Paris Dauphine
534
+ 英属哥伦比亚大学,,海外名校,University of British Columbia
535
+ 英属哥伦比亚大学,,海外名校,UBC
536
+ 昆士兰大学,47,海外名校,The University of Queensland
537
+ 昆士兰大学,47,海外名校,UQ
538
+ 巴黎理工学院,49,海外名校,Institut Polytechnique de Paris
539
+ 巴黎理工学院,49,海外名校,IP-Paris
540
+ 伦敦政治经济学院,49,海外名校,The London School of Economics and Political Science
541
+ 伦敦政治经济学院,49,海外名校,LSE
542
+ 伦敦政治经济学院,49,海外名校,London School of Economics
543
+ 慕尼黑工业大学,,海外名校,Technical University of Munich
544
+ 杜克大学,52,海外名校,Duke University
545
+ 卡耐基梅隆大学,,海外名校,Carnegie Mellon University
546
+ 卡耐基梅隆大学,,海外名校,TU München
547
+ 卡耐基梅隆大学,,海外名校,TUM
548
+ 香港城市大学,53,海外名校,City University of Hong Kong
549
+ 香港城市大学,53,海外名校,CityU
550
+ 阿姆斯特丹大学,55,海外名校,University of Amsterdam
551
+ 阿姆斯特丹大学,55,海外名校,UvA
552
+ 东京工业大学,56,海外名校,Tokyo Institute of Technology
553
+ 东京工业大学,56,海外名校,Tokyo Tech
554
+ 代尔夫特理工大学,57,海外名校,Delft University of Technology
555
+ 莫纳什大学,,海外名校,Monash University
556
+ 莫纳什大学,,海外名校,Monash
557
+ 莫纳什大学,,海外名校,蒙纳士
558
+ 莫纳什大学,,海外名校,莫纳什
559
+ 莫纳什大学,,海外名校,莫大
560
+ 布朗大学,60,海外名校,Brown University
561
+ 布朗大学,60,海外名校,Brown
562
+ 布朗大学,60,海外名校,布朗
563
+ 华威大学,61,海外名校,The University of Warwick
564
+ 华威大学,61,海外名校,华威
565
+ 华威大学,61,海外名校,Warwick
566
+ 华威大学,61,海外名校,UoW
567
+ 布里斯托大学,62,海外名校,University of Bristol
568
+ 布里斯托大学,62,海外名校,布大
569
+ 海德堡大学,63,海外名校,Heidelberg University
570
+ 海德堡大学,63,海外名校,海德堡大学
571
+ 慕尼黑大学,64,海外名校,LMU
572
+ 慕尼黑大学,64,海外名校,University of Munich
573
+ 马来亚大学,65,海外名校,University of Malaya
574
+ 马来亚大学,65,海外名校,马大
575
+ 香港理工大学,66,海外名校,The Hong Kong Polytechnic University
576
+ 香港理工大学,66,海外名校,PolyU
577
+ 香港理工大学,66,海外名校,理大
578
+ 香港理工大学,66,海外名校,港理工
579
+ 德克萨斯大学奥斯汀分校,,海外名校,University of Texas at Austin
580
+ 德克萨斯大学奥斯汀分校,,海外名校,得州大学
581
+ 德克萨斯大学奥斯汀分校,,海外名校,UT-Austin
582
+ 国立台湾大学,68,海外名校,National Taiwan University
583
+ 国立台湾大学,68,海外名校,台大
584
+ 布宜诺斯艾利斯大学,69,海外名校,Universidad de Buenos Aires
585
+ 布宜诺斯艾利斯大学,69,海外名校,UBA
586
+ 鲁汶大学(荷语),,海外名校,University of Leuven
587
+ 鲁汶大学(荷语),,海外名校,KU Leuven
588
+ 苏黎世大学,70,海外名校,University of Zurich
589
+ 苏黎世大学,70,海外名校,UZH
590
+ 索邦大学,72,海外名校,Sorbonne University
591
+ 索邦大学,72,海外名校,索邦
592
+ 索邦大学,72,海外名校,Sorbonne
593
+ 格拉斯哥大学,73,海外名校,University of Glasgow
594
+ 格拉斯哥大学,73,海外名校,格大
595
+ 高丽大学,,海外名校,Korea University
596
+ 高丽大学,,海外名校,高丽大
597
+ 大阪大学,75,海外名校,Osaka University
598
+ 大阪大学,75,海外名校,阪大
599
+ 威斯康辛大学麦迪逊分校,,海外名校,University of Wisconsin-Madison
600
+ 威斯康辛大学麦迪逊分校,,海外名校,UW-Madison
601
+ 南安普敦大学,,海外名校,University of Southampton
602
+ 南安普敦大学,,海外名校,Soton
603
+ 莫斯科国立大学,,海外名校,Lomonosov Moscow State University
604
+ 莫斯科国立大学,,海外名校,莫斯科大学
605
+ 哥本哈根大学,79,海外名校,University of Copenhagen
606
+ 哥本哈根大学,79,海外名校,UCPH
607
+ 延世大学,79,海外名校,Yonsei University
608
+ 延世大学,79,海外名校,연세대학교
609
+ 浦项科技大学,81,海外名校,Pohang University of Science And Technology
610
+ 浦项科技大学,81,海外名校,POSTECH
611
+ 杜伦大学,,海外名校,Durham University
612
+ 杜伦大学,,海外名校,Durham
613
+ 伊利诺伊大学厄巴纳-香槟分校,,海外名校,University of Illinois at Urbana-Champaign
614
+ 伊利诺伊大学厄巴纳-香槟分校,,海外名校,UIUC
615
+ 奥克兰大学,85,海外名校,The University of Auckland
616
+ 奥克兰大学,85,海外名校,UoA
617
+ 华盛顿大学,85,海外名校,University of Washington
618
+ 华盛顿大学,85,海外名校,UWashington
619
+ 华盛顿大学,85,海外名校,UW
620
+ 巴黎萨克雷大学,86,海外名校,Université Paris-Saclay
621
+ 巴黎萨克雷大学,86,海外名校,UPSaclay
622
+ 巴黎萨克雷大学,86,海外名校,UPS
623
+ 隆德大学,87,海外名校,Lund University
624
+ 佐治亚理工学院,88,海外名校,Georgia Institute of Technology
625
+ 佐治亚理工学院,88,海外名校,Georgia Tech
626
+ 佐治亚理工学院,88,海外名校,Gatech
627
+ 佐治亚理工学院,88,海外名校,GT
628
+ 瑞典皇家理工学院,89,海外名校,KTH Royal Institute of Technology
629
+ 瑞典皇家理工学院,89,海外名校,皇家理工学院
630
+ 瑞典皇家理工学院,89,海外名校,KTH
631
+ 伯明翰大学,90,海外名校,University of Birmingham
632
+ 伯明翰大学,90,海外名校,伯大
633
+ 伯明翰大学,90,海外名校,UoB
634
+ 圣安德鲁斯大学,91,海外名校,University of St Andrews
635
+ 圣安德鲁斯大学,91,海外名校,St Andrews
636
+ 圣安德鲁斯大学,91,海外名校,St And
637
+ 圣安德鲁斯大学,91,海外名校,圣安
638
+ 利兹大学,92,海外名校,University of Leeds
639
+ 西澳大学,93,海外名校,The University of Western Australia
640
+ 西澳大学,93,海外名校,UWA
641
+ 莱斯大学,94,海外名校,Rice University
642
+ 莱斯大学,94,海外名校,Rice
643
+ 谢菲尔德大学,95,海外名校,The University of Sheffield
644
+ 谢菲尔德大学,95,海外名校,谢菲
645
+ 谢菲尔德大学,95,海外名校,谢大
646
+ 谢菲尔德大学,95,海外名校,TUoS
647
+ 宾州州立大学公园分校,,海外名校,Pennsylvania State University
648
+ 宾州州立大学公园分校,,海外名校,University Park
649
+ 成均馆大学,97,海外名校,Sungkyunkwan University(SKKU)
650
+ 成均馆大学,97,海外名校,成均馆大
651
+ 成均馆大学,97,海外名校,SKKU
652
+ 丹麦技术大学,99,海外名校,Technical University of Denmark
653
+ 丹麦技术大学,99,海外名校,DTU
654
+ 北卡罗来纳大学教堂山分校,100,海外名校,University of North Carolina at Chapel Hill
655
+ 北卡罗来纳大学教堂山分校,100,海外名校,UNC
656
+ 都柏林三一学院,,海外名校,Trinity College Dublin
657
+ 都柏林三一学院,,海外名校,The University of Dublin
658
+ 都柏林三一学院,,海外名校,TCD
659
+ 奥斯陆大学,102,海外名校,University of Oslo
660
+ 奥斯陆大学,102,海外名校,UiO
661
+ 奥斯陆大学,102,海外名校,奥大
662
+ 诺丁汉大学,103,海外名校,University of Nottingham
663
+ 诺丁汉大学,103,海外名校,UoN
664
+ 诺丁汉大学,103,海外名校,诺大
665
+ 赫尔辛基大学,104,海外名校,University of Helsinki
666
+ 赫尔辛基大学,104,海外名校,UH
667
+ 墨西哥国立自治大学,105,海外名校,Universidad Nacional Autónoma de México
668
+ 墨西哥国立自治大学,105,海外名校,墨国大
669
+ 墨西哥国立自治大学,105,海外名校,UNAM
670
+ 日内瓦大学,105,海外名校,University of Geneva
671
+ 日内瓦大学,105,海外名校,UNIGE
672
+ 圣路易斯华盛顿大学,107,海外名校,Washington University in St. Louis
673
+ 圣路易斯华盛顿大学,107,海外名校,WashU
674
+ 圣路易斯华盛顿大学,107,海外名校,WUSTL
675
+ 阿德雷德大学,,海外名校,The University of Adelaide
676
+ 阿德雷德大学,,海外名校,阿大
677
+ 阿卜杜勒阿齐兹国王大学,109,海外名校,King Abdulaziz University (KAU)
678
+ 乌得勒支大学,,��外名校,Utrecht University
679
+ 乌得勒支大学,,海外名校,UU
680
+ 蒙特利尔大学,111,海外名校,Université de Montréal
681
+ 蒙特利尔大学,111,海外名校,UdeM
682
+ 蒙特利尔大学,111,海外名校,蒙大
683
+ 阿尔托大学,112,海外名校,Aalto University
684
+ 阿尔托大学,112,海外名校,Aalto
685
+ 波士顿大学,112,海外名校,Boston University
686
+ 波士顿大学,112,海外名校,BU
687
+ 莱顿大学,112,海外名校,Leiden University
688
+ 南加州大学,11,海外名校,University of Southern California
689
+ 南加州大学,11,海外名校,南加州大学
690
+ 南加州大学,11,海外名校,南加大
691
+ 南加州大学,11,海外名校,USC
692
+ 普渡大学西拉法叶分校,116,海外名校,Purdue University
693
+ 普渡大学西拉法叶分校,116,海外名校,普渡大学
694
+ 伦敦大学玛丽女王学院,,海外名校,Queen Mary University of London
695
+ 伦敦大学玛丽女王学院,,海外名校,QMUL
696
+ 伦敦大学玛丽女王学院,,海外名校,Queen Mary
697
+ 名古屋大学,118,海外名校,Nagoya University
698
+ 名古屋大学,118,海外名校,名大
699
+ 伯尔尼大学,119,海外名校,University of Bern
700
+ 伯尔尼大学,119,海外名校,UniBe
701
+ 俄亥俄州立大学,120,海外名校,The Ohio State University
702
+ 俄亥俄州立大学,120,海外名校,Ohio State
703
+ 俄亥俄州立大学,120,海外名校,OSU
704
+ 查尔姆斯理工大学,121,海外名校,Chalmers University of Technology
705
+ 查尔姆斯理工大学,121,海外名校,Chalmers
706
+ 圣保罗大学,121,海外名校,Universidade de São Paulo
707
+ 圣保罗大学,121,海外名校,USP
708
+ 圣保罗大学,121,海外名校,圣大
709
+ 瓦格宁根大学,,海外名校,Wageningen University & Research
710
+ 瓦格宁根大学,,海外名校,Wageningen UR
711
+ 乌普萨拉大学,124,海外名校,Uppsala University
712
+ 乌普萨拉大学,124,海外名校,乌大
713
+ 埃因霍温理工大学,,海外名校,Eindhoven University of Technology
714
+ 埃因霍温理工大学,,海外名校,TU/e
715
+ 埃因霍温理工大学,,海外名校,TU Eindhoven
716
+ 柏林自由大学,127,海外名校,Freie Universitaet Berlin
717
+ 柏林自由大学,127,海外名校,FU Berlin
718
+ 柏林洪堡大学,128,海外名校,Humboldt-Universität zu Berlin
719
+ 柏林洪堡大学,128,海外名校,洪堡大学
720
+ 柏林洪堡大学,128,海外名校,柏林大学
721
+ 格罗宁根大学,128,海外名校,University of Groningen
722
+ 格罗宁根大学,128,海外名校,RuG
723
+ 里昂高等师范学院,,海外名校,École Normale Supérieure de Lyon
724
+ 里昂高等师范学院,,海外名校,ENS Lyon
725
+ 兰卡斯特大学,,海外名校,Lancaster University
726
+ 兰卡斯特大学,,海外名校,兰卡
727
+ 悉尼科技大学,133,海外名校,University of Technology Sydney
728
+ 悉尼科技大学,133,海外名校,UTS
729
+ 悉尼科技大学,133,海外名校,悉尼科大
730
+ 纽卡斯尔大学,134,海外名校,Newcastle University
731
+ 纽卡斯尔大学,134,海外名校,NCL
732
+ 纽卡斯尔大学,134,海外名校,纽大
733
+ 智利天主大学,,海外名校,Pontificia Universidad Católica de Chile (UC)
734
+ 卡尔斯鲁厄理工学院,136,海外名校,KIT
735
+ 卡尔斯鲁厄理工学院,136,海外名校,Karlsruhe Institute of Technology
736
+ 九州大学,137,海外名校,Kyushu University
737
+ 九州大学,137,海外名校,九大
738
+ 巴塞尔大学,138,海外名校,University of Basel
739
+ 巴塞尔大学,138,海外名校,Unibasel
740
+ 麦克马斯特大学,140,海外名校,McMaster University
741
+ 麦克马斯特大学,140,海外名校,麦马
742
+ 根特大学,141,海外名校,Ghent University
743
+ 根特大学,141,海外名校,UGent
744
+ 米兰理工大学,142,海外名校,Politecnico di Milano
745
+ 米兰理工大学,142,海外名校,POLIMI
746
+ 米兰理工大学,142,海外名校,米理
747
+ 米兰理工大学,142,海外名校,米兰理工
748
+ 马来西亚博特拉大学,143,海外名校,Universiti Putra Malaysia (UPM)
749
+ 马来西亚博特拉大学,143,海外名校,UPM
750
+ 马来西亚国民大学,144,海外名校,Universiti Kebangsaan Malaysia (UKM)
751
+ 马来西亚国民大学,144,海外名校,UKM
752
+ 北海道大学,145,海外名校,Hokkaido University
753
+ 北海道大学,145,海外名校,北大(ほくだい)
754
+ 马来西亚理科大学,147,海外名校,Universiti Sains Malaysia (USM)
755
+ 马来西亚理科大学,147,海外名校,USM
756
+ 马来西亚理科大学,147,海外名校,槟城理大
757
+ 斯德哥尔摩大学,148,海外名校,Stockholm University
758
+ 斯德哥尔摩大学,148,海外名校,SU
759
+ 埃克塞特大学,149,海外名校,The University of Exeter
760
+ 埃克塞特大学,149,海外名校,Exon
761
+ 滑铁卢大学,149,海外名校,University of Waterloo
762
+ 滑铁卢大学,149,海外名校,Waterloo(UW)
763
+ 卡迪夫大学,151,海外名校,Cardiff University
764
+ 卡迪夫大学,151,海外名校,卡大
765
+ 维也纳大学,151,海外名校,University of Vienna
766
+ 维也纳大学,151,海外名校,univie
767
+ 约克大学(英国),,海外名校,University of York
768
+ 约克大学(英国),,海外名校,York
769
+ 罗切斯特大学,,海外名校,University of Rochester
770
+ 罗切斯特大学,,海外名校,U of R
771
+ 奥胡斯大学,155,海外名校,Aarhus University
772
+ 奥胡斯大学,155,海外名校,AU
773
+ 汉阳大学,156,海外名校,Hanyang University
774
+ 汉阳大学,156,海外名校,汉阳大
775
+ 密歇根州立大学,157,海外名��,Michigan State University
776
+ 密歇根州立大学,157,海外名校,MSU
777
+ 马里兰大学学院公园分校,,海外名校,University of Maryland
778
+ 马里兰大学学院公园分校,,海外名校,College Park
779
+ 马里兰大学学院公园分校,,海外名校,UMD
780
+ 马里兰大学学院公园分校,,海外名校,UMCP
781
+ 柏林工业大学,159,海外名校,Technische Universität Berlin (TU Berlin)
782
+ 柏林工业大学,159,海外名校,TUB
783
+ 柏林工业大学,159,海外名校,TU Berlin
784
+ 埃默里大学,160,海外名校,Emory University
785
+ 埃默里大学,160,海外名校,Emory
786
+ 凯斯西储大学,161,海外名校,Case Western Reserve University
787
+ 凯斯西储大学,161,海外名校,CWRU
788
+ 凯斯西储大学,161,海外名校,CASE
789
+ 蒙特雷理工学院,,海外名校,Tecnológico de Monterrey
790
+ 法赫德国王石油与矿产大学,,海外名校,King Fahd University of Petroleum & Minerals
791
+ 法赫德国王石油与矿产大学,,海外名校,KFUPM
792
+ 匹兹堡大学,163,海外名校,University of Pittsburgh
793
+ 匹兹堡大学,163,海外名校,PITT
794
+ 匹兹堡大学,163,海外名校,匹大
795
+ 亚琛工业大学,165,海外名校,RWTH Aachen University
796
+ 亚琛工业大学,165,海外名校,RWTH Aachen
797
+ 亚琛工业大学,165,海外名校,RWTH
798
+ 博洛尼亚大学,166,海外名校,Alma Mater Studiorum - University of Bologna
799
+ 博洛尼亚大学,166,海外名校,Unibo
800
+ 博洛尼亚大学,166,海外名校,博大
801
+ 巴斯大学,166,海外名校,University of Bath
802
+ 德州农工大学,168,海外名校,Texas A&M University
803
+ 德州农工大学,168,海外名校,TAMU
804
+ 巴塞罗那大学,168,海外名校,Universitat de Barcelona
805
+ 西安大略大学,,海外名校,Western University
806
+ 西安大略大学,,海外名校,韦仕敦大学
807
+ 西安大略大学,,海外名校,UWO
808
+ 罗马第一大学,,海外名校,Sapienza University of Rome
809
+ 罗马第一大学,,海外名校,罗马一大
810
+ 弗莱堡大学,,海外名校,Albert-Ludwigs-Universitaet Freiburg
811
+ 都柏林大学学院,,海外名校,University College Dublin
812
+ 都柏林大学学院,,海外名校,UCD
813
+ 都柏林大学学院,,海外名校,UC Dublin
814
+ 佛罗里达大学,173,海外名校,University of Florida
815
+ 佛罗里达大学,173,海外名校,UF
816
+ 佛罗里达大学,173,海外名校,UFL
817
+ 国立哈萨克大学,,海外名校,Al-Farabi Kazakh National University
818
+ 洛桑大学,176,海外名校,University of Lausanne
819
+ 蒂宾根大学,177,海外名校,Eberhard Karls Universität Tübingen
820
+ 印度理工学院孟买分校,,海外名校,Indian Institute of Technology Bombay
821
+ 印度理工学院孟买分校,,海外名校,IITB
822
+ 伊拉斯姆斯大学,,海外名校,Erasmus University Rotterdam
823
+ 国立清华大学,180,海外名校,National Tsing Hua University
824
+ 维也纳技术大学,,海外名校,Technische Universität Wien
825
+ 哥德堡大学,180,海外名校,University of Gothenburg
826
+ 哈里发大学,,海外名校,Khalifa University
827
+ 智利大学,183,海外名校,Universidad de Chile
828
+ 印度理工学院德里分校,,海外名校,Indian Institute of Technology Delhi
829
+ 印度理工学院德里分校,,海外名校,IITD
830
+ 印度科学学院,,海外名校,Indian Institute of Science
831
+ 明尼苏达大学双城分校,186,海外名校,University of Minnesota Twin Cities
832
+ 鲁汶大学(法语),,海外名校,Université catholique de Louvain
833
+ 鲁汶大学(法语),,海外名校,UCLouvain
834
+ 利物浦大学,189,海外名校,University of Liverpool
835
+ 特文特大学,,海外名校,University of Twente
836
+ 达特茅斯学院,191,海外名校,Dartmouth College
837
+ 马来西亚理工大学,,海外名校,Universiti Teknologi Malaysia
838
+ 卧龙岗大学,193,海外名校,University of Wollongong
839
+ 科廷大学,194,海外名校,Curtin University
840
+ 德累斯顿工业大学,194,海外名校,Technische Universität Dresden
841
+ 奥塔戈大学,,海外名校,University of Otago
842
+ 纽卡斯尔大学(澳洲),,海外名校,The University of Newcastle
843
+ 纽卡斯尔大学(澳洲),,海外名校,Australia
844
+ 纽卡斯尔大学(澳洲),,海外名校,UON
845
+ 耶路撒冷希伯来大学,198,海外名校,The Hebrew University of Jerusalem
846
+ 卑尔根大学,199,海外名校,University of Bergen
847
+ 麦考瑞大学,200,海外名校,Macquarie University
848
+ 加州大学伯克利分校,32,海外名校,"University of California, Berkeley"
849
+ 加州大学伯克利分校,32,海外名校,UC Berkeley
850
+ 加州大学伯克利分校,32,海外名校,Cal
851
+ 加州大学圣地亚哥分校,48,海外名校,"University of California, San Diego"
852
+ 加州大学圣地亚哥分校,48,海外名校,UCSD
853
+ 加州大学圣地亚哥分校,48,海外名校,UC San Diego
854
+ 加州大学戴维斯分校,138,海外名校,"University of California, Davis"
855
+ 加州大学戴维斯分校,138,海外名校,UC Davis
856
+ 加州大学圣塔芭芭拉分校,146,海外名校,"University of California, Santa Barbara"
857
+ 加州大学圣塔芭芭拉分校,146,海外名校,UCSB
858
+ 加州大学圣塔芭芭拉分校,146,海外名校,UC Santa Barbara
859
+ 上海体育学院,,双一流,SUS
860
+ 密歇根大学-安娜堡,23,海外名校,University of Michigan - Ann Arbor
861
+ 伦敦国王学院,35,海外名校,King’s College London
862
+ 加州大学洛杉矶分校,40,海外名校,"University of California, Los Angeles"
863
+ 韩国科学技术院,41,海外名校,KAIST - Korea Advanced Institute of Science & Technology
864
+ 巴黎文理研究大学,44,海外名校,PSL University
865
+ 德克萨斯州大学奥斯汀分校,67,海外名校,The University of Texas at Austin
866
+ 威斯康星大学-麦迪逊,75,海外名校,University of Wisconsin - Madison
867
+ 宾夕法尼亚州立大学,96,海外名校,Penn State (Main campus)
868
+ 里昂高等师范学校,130,海外名校,Ecole Normale Superieure - Lyon
869
+ 智利天主教大学,135,海外名校,Pontifical Catholic University of Chile
870
+ 约克大学,494,海外名校,York University
871
+ 马里兰大学帕克分校,158,海外名校,"University of Maryland, College Park"
872
+ 蒙特雷技术学院,161,海外名校,Tecnologico de Monterrey
873
+ 天主教鲁汶大学(法语区),188,海外名校,Université Catholique de Louvain
874
+ 澳大利亚纽卡斯尔大学,197,海外名校,"The University of Newcastle, Australia"
875
+ 庆应义塾大学,201,海外名校,Keio University
876
+ 雷丁大学,202,海外名校,University of Reading
877
+ 早稻田大学,203,海外名校,Waseda University
878
+ 哥廷根大学,204,海外名校,University of Göttingen
879
+ 阿伯丁大学,205,海外名校,University of Aberdeen
880
+ 墨尔本皇家理工大学,206,海外名校,RMIT University
881
+ 马德里自治大学,207,海外名校,Autonomous University of Madrid
882
+ 布鲁塞尔自由大学,207,海外名校,Universite libre de Bruxelles
883
+ 阿姆斯特丹自由大学,209,海外名校,Vrije Universiteit Amsterdam
884
+ 巴塞罗那自治大学,209,海外名校,Autonomous University of Barcelona
885
+ 蔚山国立科学技术研究所,212,海外名校,Ulsan National Institute of Science & Technology
886
+ 昆士兰科技大学,213,海外名校,Queensland University of Technology
887
+ 汉堡大学,214,海外名校,Universität Hamburg
888
+ 朱拉隆功大学,215,海外名校,Chulalongkorn University
889
+ 亚利桑那州立大学,216,海外名校,Arizona State University
890
+ 贝尔法斯特女王大学,216,海外名校,Queen’s University Belfast
891
+ 范德堡大学,218,海外名校,Vanderbilt University
892
+ 坎皮纳斯州立大学,219,海外名校,State University of Campinas
893
+ 拉德堡德大学,220,海外名校,Radboud University
894
+ 布鲁塞尔大学,221,海外名校,Vrije Universiteit Brussel
895
+ 圣母大学,701,海外名校,Notre Dame University-Louaize NDU
896
+ 马德里康普顿斯大学,223,海外名校,Complutense University of Madrid
897
+ 卡塔尔大学,224,海外名校,Qatar University
898
+ 波恩大学,226,海外名校,Rheinische Friedrich-Wilhelms-Universität Bonn
899
+ 开普敦大学,226,海外名校,University of Cape Town
900
+ 萨塞克斯大学,226,海外名校,University of Sussex
901
+ 弗吉尼亚大学,226,海外名校,University of Virginia
902
+ 渥太华大学,230,海外名校,University of Ottawa
903
+ 拉夫堡大学,231,海外名校,Loughborough University
904
+ 加州大学欧文分校,232,海外名校,"University of California, Irvine"
905
+ 安特卫普大学,233,海外名校,University of Antwerp
906
+ 马斯特里赫特大学,233,海外名校,Maastricht University
907
+ 卡尔加里大学,235,海外名校,University of Calgary
908
+ 安德斯大学,236,海外名校,Universidad de los Andes
909
+ 莱斯特大学,236,海外名校,University of Leicester
910
+ 维多利亚大学,601,海外名校,Victoria University
911
+ 金斯敦女王大学,240,海外名校,Queen's University at Kingston
912
+ 瑞士提契诺大学,240,海外名校,USI - Università della Svizzera italiana
913
+ 贝鲁特美国大学,242,海外名校,American University of Beirut
914
+ 帕多瓦大学,242,海外名校,Università di Padova
915
+ 圣彼得堡国立大学,242,海外名校,Saint Petersburg State University
916
+ 巴黎高科桥梁学院,245,海外名校,Ecole des Ponts ParisTech
917
+ 新西伯利亚州立大学,246,海外名校,Novosibirsk State University
918
+ 马萨诸塞大学安姆斯特分校,246,海外名校,University of Massachusetts
919
+ 乔治敦大学,248,海外名校,Georgetown University
920
+ 庞培法布拉大学,248,海外名校,Pompeu Fabra University
921
+ 文莱达鲁萨兰国大学,250,海外名校,Universiti Brunei Darussalam (UBD)
922
+ 科罗拉多大学-玻尔得,251,海外名校,University of Colorado at Boulder
923
+ 国立成功大学,252,海外名校,National Cheng Kung University
924
+ 叶史瓦大学,252,海外名校,Yeshiva University
925
+ 加查马达大学,254,海外名校,Gadjah Mada University
926
+ 印度马德拉斯技术学院,255,海外名校,Indian Institute of Technology Madras
927
+ 玛希隆大学,255,海外名校,Mahidol University
928
+ 特拉维夫大学,255,海外名校,Tel Aviv University
929
+ 哥伦比亚国立大学,258,海外名校,National University of Colombia
930
+ 爱尔兰国立大学戈尔韦分校,258,海外名校,National University of Ireland Galway
931
+ 坎特伯雷大学,258,海外名校,University of Canterbury
932
+ 科学宝,261,海外名校,Sciences Po
933
+ 巴黎大学,261,海外名校,Université de Paris
934
+ 庆熙大学,264,海外名校,Kyung Hee University
935
+ 罗格斯大学,264,海外名校,Rutgers University–New Brunswick
936
+ 纳瓦拉大学,266,海外名校,University of Navarra
937
+ 布拉格查理大学,266,海外名校,Charles University
938
+ 国立阳明大学,268,海外名校,National Yang Ming Chiao Tung University
939
+ 亚利桑那大学,268,海外名校,The University of Arizona
940
+ 达姆施塔特工业大学,269,海外名校,Technical University Darmstadt
941
+ 赫瑞瓦特大学,270,海外名校,Heriot Watt University
942
+ 达尔豪斯大学,272,海外名校,Dalhousie University
943
+ 托木斯克州立大学,272,海外名校,Tomsk State University
944
+ 萨里大学,272,海外名校,University of Surrey
945
+ 南方科技大学,275,海外名校,Southern University of Science & Technology
946
+ 塔夫茨大学,275,海外名校,Tufts University
947
+ 格拉茨科技大学,277,海外名校,Graz University of Technology
948
+ 印度理工学院坎普尔,277,海外名校,Indian Institute of Technology Kanpur
949
+ 沙特国王大学,277,海外名校,King Saud University
950
+ 印度理工学院卡拉格普尔分校,280,海外名校,Indian Institute of Technology Kharagpur
951
+ 鲍曼莫斯科国立技术大学,281,海外名校,Bauman Moscow State Technical University
952
+ 因斯布鲁克大学,281,海外名校,University of Innsbruck
953
+ 迪肯大学,283,海外名校,Deakin University
954
+ 梅西大学,284,海外名校,Massey University
955
+ 伊利诺伊大学芝加哥分校,285,海外名校,University of Illinois at Chicago
956
+ 筑波大学,285,海外名校,University of Tsukuba
957
+ 香港浸会大学,287,海外名校,Hong Kong Baptist University
958
+ 阿联酋大学,288,海外名校,United Arab Emirates University
959
+ 印度尼西亚大学,290,海外名校,University of Indonesia
960
+ 格里菲斯大学,290,海外名校,Griffith University
961
+ 莫斯科物理科学与技术学院,290,海外名校,Moscow Institute of Physics and Technology
962
+ 巴黎第一大学,290,海外名校,Panthéon-Sorbonne University – Paris 1
963
+ 白俄罗斯国立大学,295,海外名校,Belarusian State University
964
+ 波尔图大学,295,海外名校,University of Porto
965
+ 图尔库大学,295,海外名校,University of Turku
966
+ 科克大学,298,海外名校,University College Cork
967
+ 西蒙弗雷泽大学,298,海外名校,Simon Fraser University
968
+ 北卡罗来纳州立大学,300,海外名校,North Carolina State University
969
+ 塔尔图大学,300,海外名校,University of Tartu
970
+ 斯特拉斯克莱德大学,302,海外名校,University of Strathclyde
971
+ 塔斯马尼亚大学,303,海外名校,University of Tasmania
972
+ 万隆理工学院(ITB),303,海外名校,Bandung Institute of Technology (ITB)
973
+ 光州科学技术学院,305,海外名校,Gwangju Institute of Science and Technology
974
+ 俄罗斯国立高等经济大学,305,海外名校,HSE University
975
+ 东英吉利大学,307,海外名校,University of East Anglia
976
+ 华沙大学,308,海外名校,University of Warsaw
977
+ 南丹麦大学,309,海外名校,University of Southern Denmark
978
+ 雅盖隆大学,309,海外名校,Jagiellonian University
979
+ 印第安纳大学伯明顿分校,311,海外名校,Indiana University
980
+ 科隆大学,311,海外名校,University of Cologne
981
+ 迈阿密大学,311,海外名校,University of Miami
982
+ 格勒诺布尔阿尔卑斯大学,314,海外名校,Grenoble Alpes University
983
+ 国立台湾科技大学,314,海外名校,National Taiwan University of Science and Technology
984
+ 米兰大学,316,海外名校,University of Milan
985
+ 俄罗斯人民友谊大学,317,海外名校,RUDN University
986
+ IE大学,317,海外名校,IE University
987
+ 国立核研究大学-莫斯科工程物理学院,319,海外名校,National Research Nuclear University MEPhI
988
+ 加泰罗尼亚理工大学,319,海外名校,Polytechnic University of Catalonia
989
+ 斯威本科技大学,321,海外名校,Swinburne University of Technology
990
+ 阿根廷天主教大学,322,海外名校,Pontificia Universidad Católica Argentina
991
+ 邓迪大学,322,海外名校,University of Dundee
992
+ 澳门大学,322,海外名校,University of Macau
993
+ 埃尔朗根-纽伦堡大学,322,海外名校,Friedrich-Alexander-Universität Erlangen-Nürnberg
994
+ 奥尔堡大学,326,海外名校,Aalborg University
995
+ 南澳大利亚大学,326,海外名校,University of South Australia
996
+ 国立欧亚大学,328,海外名校,L.N. Gumilyov Eurasian National University
997
+ 林雪平大学,329,海外名校,Linkoping University
998
+ 以色列理工学院,330,海外名校,Technion - Israel Institute of Technology
999
+ 伦敦大学城市学院,330,海外名校,City University London
1000
+ 伦敦大学伯贝克学院,332,海外名校,Birkbeck University London
1001
+ 圣光机大学,365,海外名校,ITMO University
1002
+ 国立台湾师范大学,334,海外名校,National Taiwan Normal University
1003
+ 都灵理工大学,334,海外名校,Politecnico di Torino
1004
+ 伦敦大学皇家霍洛威学院,334,海外名校,Royal Holloway University of London
1005
+ 法兰克福大学,340,海外名校,Goethe University Frankfurt
1006
+ 夏威夷大学马诺阿分校,340,海外名校,University of Hawai’i at Mānoa
1007
+ 广岛大学,343,海外名校,Hiroshima University
1008
+ 耶拿大学,344,海外名校,Universität Jena
1009
+ 文莱大学,344,海外名校,Universiti Teknologi Brunei
1010
+ 弗吉尼亚理工,346,海外名校,Virginia Polytechnic Institute and State University
1011
+ 斯图加特大学,347,海外名校,Universität Stuttgart
1012
+ 加州大学圣克鲁兹分校,347,海外名校,"University of California, Santa Cruz"
1013
+ 喀山联邦大学,347,海外名校,Kazan Federal University
1014
+ 斯特雅大学,347,海外名校,UCSI University
1015
+ 马德里卡洛斯三世大学,351,海外名校,Universidad Carlos III de Madrid
1016
+ 乌拉尔联邦大学,351,海外名校,Ural Federal University
1017
+ 伦敦布鲁内尔大学,351,海外名校,Brunel University London
1018
+ 约翰内斯开普勒大学林茨,354,海外名校,Johannes Kepler University Linz
1019
+ 乔治华盛顿大学,355,海外名校,George Washington University
1020
+ 里斯本大学,356,海外名校,University of Lisbon
1021
+ 蒂尔堡大学,356,海外名校,Tilburg University
1022
+ 国立科技大学-巴基斯坦,358,海外名校,National University of Sciences & Technology - Pakistan
1023
+ 于韦斯屈莱大学,358,海外名校,University of Jyvaskyla
1024
+ 犹他大学,358,海外名校,University of Utah
1025
+ 拉筹伯大学,362,海外名校,La Trobe University
1026
+ 莫斯科国立国际关系学院,362,海外名校,MGIMO University
1027
+ 梨花女子大学,362,海外名校,Ewha Womans University
1028
+ 乌尔姆大学,365,海外名校,Ulm University
1029
+ 于默奥大学,365,海外名校,Umea University
1030
+ 苏丹卡布斯大学,368,海外名校,Sultan Qaboos University
1031
+ 挪威科技大学,369,海外名校,Norwegian University of Science and Technology
1032
+ 里约热内卢联邦大学,369,海外名校,Federal University of Rio de Janeiro
1033
+ 瓦伦西亚理工大学,371,海外名校,Polytechnic University of Valencia
1034
+ 林肯大学,801,海外名校,University of Lincoln
1035
+ 布拉格化工大学,373,海外名校,"University of Chemistry & Technology, Prague"
1036
+ 康涅狄格大学,373,海外名校,University of Connecticut
1037
+ 怀卡托大学,373,海外名校,University of Waikato
1038
+ 奥卢大学,377,海外名校,University of Oulu
1039
+ 维尔茨堡大学,378,海外名校,Julius-Maximilians-Universität Würzburg
1040
+ 真纳大学,378,海外名校,Quaid I Azam University
1041
+ 纽约州立大学石溪分校,378,海外名校,Stony Brook University
1042
+ 谢里夫科技大学,381,海外名校,Sharif University of Technology
1043
+ 东京医科齿科大学,381,海外名校,Tokyo Medical and Dental University
1044
+ 美国沙迦大学,383,海外名校,American University of Sharjah
1045
+ 肯特大学,383,海外名校,University of Kent
1046
+ 神户大学,386,海外名校,Kobe University
1047
+ 堪萨斯大学,387,海外名校,University of Kansas
1048
+ 纽约州立大学布法罗分校,388,海外名校,University at Buffalo
1049
+ 比萨大学,388,海外名校,University of Pisa
1050
+ 圣拉斐尔生命健康大学,390,海外名校,Università Vita-Salute San Raffaele
1051
+ 伦敦大学亚非学院,391,海外名校,SOAS University of London
1052
+ 巴勒莫大学,801,海外名校,University of Palermo
1053
+ 彼得大帝圣彼得堡理工大学,393,海外名校,Peter the Great St Petersburg Polytechnic University
1054
+ 波鸿大学,393,海外名校,University of Bochum
1055
+ 国立研究托木斯克理工大学,395,海外名校,National Research Tomsk Polytechnic University
1056
+ 秘鲁天主教大学,395,海外名校,Pontifical Catholic University of Peru
1057
+ 印度理工学院古瓦哈提,395,海外名校,Indian Institute of Technology Guwahati
1058
+ 巴基斯坦工程与应用科学研究所,398,海外名校,Pakistan Institute of Engineering and Applied Sciences (PIEAS)
1059
+ 菲律宾大学,399,海外名校,University of the Philippines
1060
+ 南方大学,400,海外名校,Universidad Austral
1061
+ 印度理工学院鲁尔基校区,400,海外名校,Indian Institute of Technology Roorkee
1062
+ 维尔纽斯大学,400,海外名校,Vilnius University
1063
+ 加州大学河滨分校,403,海外名校,"University of California, Riverside"
1064
+ 布拉格捷克技术大学,403,海外名校,Czech Technical University in Prague
1065
+ 弗林德斯大学,407,海外名校,Flinders University
1066
+ 台北医科大学,407,海外名校,Taipei Medical University
1067
+ 明斯特大学,411,海外名校,University of Muenster
1068
+ 国立中山大学,412,海外名校,National Sun Yat-Sen University
1069
+ 哈韦里亚纳大学,412,海外名校,Pontificia Universidad Javeriana
1070
+ 拉彭兰塔理工大学,414,海外名校,Lappeenranta University of Technology
1071
+ 拉瓦尔大学,414,海外名校,Université Laval
1072
+ 国油大学,414,海外名校,Universiti Teknologi Petronas
1073
+ 科罗拉多大学-丹佛,414,海外名校,University of Colorado at Denver
1074
+ 坦佩雷大学,414,海外名校,University of Tampere
1075
+ 中央大学,414,海外名校,Chung-Ang University
1076
+ 斯特拉斯堡大学,421,海外名校,Université de Strasbourg
1077
+ 雅典国立技术大学,421,海外名校,National Technical University of Athens
1078
+ 曼海姆大学,423,海外名校,Universität Mannheim
1079
+ 那不勒斯菲里德里克第二大学,424,海外名校,University of Naples - Federico II
1080
+ 威特沃特斯兰德大学,424,海外名校,University of the Witwatersrand
1081
+ 詹姆斯库克大学,424,海外名校,James Cook University
1082
+ 约翰内斯古腾堡美因茨大学,427,海外名校,Johannes Gutenberg University of Mainz
1083
+ 华盛顿州立大学,427,海外名校,Washington State University
1084
+ 维克森林大学,429,海外名校,Wake Forest University
1085
+ 牛津布鲁克斯大学,429,海外名校,Oxford Brookes University
1086
+ 新里斯本大学,431,海外名校,NOVA University Lisbon
1087
+ 伦斯勒理工学院,431,海外名校,Rensselaer Polytechnic Institute
1088
+ 科罗拉多州立大学,431,海外名校,Colorado State University
1089
+ 圣保罗联邦大学,434,海外名校,Federal University of Sao Paulo
1090
+ 约翰内斯堡大学,434,海外名校,University of Johannesburg
1091
+ 堪培拉大学,436,海外名校,University of Canberra
1092
+ 杜兰大学,436,海外名校,Tulane University
1093
+ 埃塞克斯大学,439,海外名校,University of Essex
1094
+ 塞浦路斯大学,440,海外名校,University of Cyprus
1095
+ 特伦托大学,440,海外名校,University of Trento
1096
+ 特罗姆瑟大学挪威北极大学,440,海外名校,University of Tromsø The Arctic University of Norway
1097
+ 斯旺西大学,440,海外名校,Swansea University
1098
+ 伊利诺伊理工学院,444,海外名校,Illinois Institute of Technology
1099
+ 韩国外国语大学,445,海外名校,HUFS - Hankuk (Korea) University of Foreign Studies
1100
+ 开罗美国大学,445,海外名校,The American University in Cairo
1101
+ 萨尔大学,447,海外名校,Saarland University
1102
+ 乌姆古拉大学,447,海外名校,Umm Al-Qura University
1103
+ 莱比锡大学,447,海外名校,Universität Leipzig
1104
+ 米兰 - 比可卡大学,450,海外名校,University of Milan-Bicocca
1105
+ 佛罗伦萨大学,451,海外名校,University of Florence
1106
+ 奥克兰理工大学,451,海外名校,Auckland University of Technology
1107
+ 邦德大学,451,海外名校,Bond University
1108
+ 科英布拉大学,455,海外名校,University of Coimbra
1109
+ 爱荷华大学,455,海外名校,University of Iowa
1110
+ 布兰迪斯大学,455,海外名校,Brandeis University
1111
+ 萨斯喀彻温大学,458,海外名校,University of Saskatchewan
1112
+ 马德里理工大学,459,海外名校,Universidad Politécnica de Madrid
1113
+ 圣加仑大学,459,海外名校,University of St Gallen
1114
+ 马丁·路德·哈勒-威登堡大学,461,海外名校,Martin Luther University of Halle-Wittenberg
1115
+ 科罗拉多矿业大学,461,海外名校,Colorado School of Mines
1116
+ 远东联邦大学,461,海外名校,Far Eastern Federal University
1117
+ 伦敦大学金史密斯学院,461,海外名校,Goldsmiths University London
1118
+ 爱兰加大学,465,海外名校,Airlangga University
1119
+ 阿米尔卡比尔理工大学,465,海外名校,Amirkabir University of Technology
1120
+ 贝尔格拉诺大学,465,海外名校,Universidad de Belgrano
1121
+ 国立台北科技大学,469,海外名校,National Taipei University of Technology
1122
+ 乌拉圭蒙得维的亚大学,469,海外名校,Universidad de Montevideo (UM)
1123
+ 乌拉圭大学,471,海外名校,Universidad ORT Uruguay
1124
+ 斯特灵大学,471,海外名校,University of Stirling
1125
+ 内盖夫本古里安大学,471,海外名校,Ben-Gurion University of the Negev
1126
+ 哈瑟尔特大学,471,海外名校,Hasselt University
1127
+ 佛罗里达州立大学,475,海外名校,Florida State University
1128
+ 密苏里大学,476,海外名校,Mizzou - University of Missouri
1129
+ 德克萨斯大学-达拉斯,477,海外名校,The University of Texas at Dallas
1130
+ 巴伊兰大学,477,海外名校,Bar-Ilan University
1131
+ 千叶大学,477,海外名校,Chiba University
1132
+ 长庚大学,480,海外名校,Chang Gung University
1133
+ 列日大学,480,海外名校,University of Liege
1134
+ 南哈萨克斯坦大学,482,海外名校,Auezov South Kazakhstan University (SKU)
1135
+ 斯泰伦博斯大学,482,海外名校,Stellenbosch University
1136
+ 韩国天主教大学,482,海外名校,The Catholic University of Korea
1137
+ 都灵大学,485,海外名校,University of Turin
1138
+ 阿斯顿大学,485,海外名校,Aston University
1139
+ 俄罗斯国立科技大学,487,海外名校,The National University of Science and Technology MISIS
1140
+ 横滨市立大学,487,海外名校,Yokohama City University
1141
+ 智利圣地亚哥大学,487,海外名校,Universidad de Santiago de Chile
1142
+ 都柏林城市大学,490,海外名校,Dublin City University
1143
+ 贾森·利比希大学,490,海外名校,Justus Liebig University Giessen
1144
+ 圣保罗州立大学,492,海外名校,UNESP
1145
+ 格拉纳达大学,492,海外名校,University of Granada
1146
+ 罗马第二大学,494,海外名校,University of Roma - Tor Vergata
1147
+ 西悉尼大学,494,海外名校,University of Western Sydney
1148
+ 波士顿学院,494,海外名校,Boston College
1149
+ 基尔大学,751,海外名校,Keele University
1150
+ 东国大学,494,海外名校,Dongguk University
1151
+ 爱荷华州立大学,494,海外名校,Iowa State University
1152
+ 西江大学,494,海外名校,Sogang University
1153
+ 华沙工业大学,501,海外名校,Warsaw University of Technology
1154
+ 艾克斯 - 马赛大学,501,海外名校,Aix-Marseille University
1155
+ 哈瓦那大学,501,海外名校,Universidad de La Habana
1156
+ 康斯坦茨大学,501,海外名校,Universität Konstanz
1157
+ 波尔多大学,501,海外名校,Universite de Bordeaux
1158
+ 德里大学,501,海外名校,University of Delhi
1159
+ 克拉根福大学,501,海外名校,University of Klagenfurt
1160
+ 利默里克大学,501,海外名校,University of Limerick
1161
+ 萨特巴耶夫大学,501,海外名校,Satbayev University
1162
+ 密苏里科技大学,511,海外名校,Missouri University of Science & Technology
1163
+ 新加坡管理大学,511,海外名校,Singapore Management University
1164
+ 茂物农业大学,511,海外名校,IPB University
1165
+ 科奇大学,511,海外名校,Koç University
1166
+ 哈尔科夫大学,511,海外名校,V. N. Karazin Kharkiv National University
1167
+ 韦恩州立大学,511,海外名校,Wayne State University
1168
+ 阿尔卡拉大学,511,海外名校,Universidad de Alcala
1169
+ 马来西亚北方大学,511,海外名校,Universiti Utara Malaysia
1170
+ 巴拉曼德大学,511,海外名校,University of Balamand
1171
+ 中央大学“阿伯勒玛塔”德拉斯别墅,521,海外名校,"Universidad Central ""Marta Abreu"" de Las Villas"
1172
+ 拜罗伊特大学,521,海外名校,University of Bayreuth
1173
+ 东芬兰大学,521,海外名校,University of Eastern Finland
1174
+ 德黑兰大学,521,海外名校,University of Tehran
1175
+ 康考迪亚大学,521,海外名校,Concordia University
1176
+ 阿卜杜勒拉曼费萨尔大学,521,海外名校,Imam Abdulrahman Bin Faisal University
1177
+ 蒙彼利埃大学,521,海外名校,Montpellier University
1178
+ 国立中央大学,521,海外名校,National Central University
1179
+ 理海大学,531,海外名校,Lehigh University
1180
+ 长崎大学,531,海外名校,Nagasaki University
1181
+ 国家研究萨拉托夫州立大学,531,海外名校,National Research Saratov State University
1182
+ 俄勒冈州立大学,531,海外名校,Oregon State University
1183
+ 贝鲁特圣约瑟夫大学,531,海外名校,Saint Joseph University of Beirut (USJ)
1184
+ 南方联邦大学,531,海外名校,Southern Federal University
1185
+ 韩国亚洲大学,531,海外名校,Ajou University
1186
+ 一桥大学,531,海外名校,Hitotsubashi University
1187
+ 里昂国家科学研究院,531,海外名校,Institut National des Sciences Appliquées de Lyon (INSA)
1188
+ 哥斯达黎加大学,531,海外名校,Universidad de Costa Rica
1189
+ 萨拉戈萨大学,531,海外名校,Universidad de Zaragoza
1190
+ 特拉华大学,531,海外名校,University of Delaware
1191
+ 蔚山大学,541,海外名校,University of Ulsan
1192
+ 阿伯里斯特威斯大学,541,海外名校,Aberystwyth University
1193
+ 伊朗科技大学,541,海外名校,Iran University of Science & Technology
1194
+ 庆北国立大学,541,海外名校,Kyungpook National University
1195
+ 新泻大学,541,海外名校,Niigata University
1196
+ 萨班哲大学,541,海外名校,Sabanci University
1197
+ 佐治亚大学,541,海外名校,The University of Georgia
1198
+ 田纳西大学,541,海外名校,"The University of Tennessee, Knoxville"
1199
+ 马萨里克大学,551,海外名校,Masaryk University
1200
+ 中东技术大学,551,海外名校,Middle East Technical University
1201
+ 图卢兹第三大学,551,海外名校,Paul Sabatier University (Toulouse 3)
1202
+ 阿拜哈萨克斯坦国立师范大学,551,海外名校,Abai Kazakh National Pedagogical University
1203
+ 塞萨洛尼基亚里士多德大学,551,海外名校,Aristotle University of Thessaloniki
1204
+ 哈萨克国立农业大学,551,海外名校,Kazakh National Agrarian University KazNAU
1205
+ 塞维利亚大学,551,海外名校,Universidad de Sevilla
1206
+ 泛美大学,551,海外名校,Universidad Panamericana (UP)
1207
+ 米兰圣心天主教大学,551,海外名校,Università Cattolica del Sacro Cuore
1208
+ 塞格德大学,551,海外名校,University of Szeged
1209
+ 帕维亚大学研究学院,561,海外名校,Università degli Studi di Pavia
1210
+ 弗里堡大学,561,海外名校,Université de Fribourg
1211
+ 阿尔泰州立大学,561,海外名校,Altai State University
1212
+ 哈林大学,561,海外名校,Hallym University
1213
+ 仁荷大学,561,海外名校,Inha University
1214
+ 贾瓦哈拉尔尼赫鲁大学,561,海外名校,Jawaharlal Nehru University
1215
+ 布伦瑞克工业大学,561,海外名校,Technical University of Braunschweig
1216
+ 新学院大学,561,海外名校,The New School
1217
+ 大阪市立大学,571,海外名校,Osaka City University
1218
+ 开罗大学,571,海外名校,Cairo University
1219
+ 卡斯里克圣灵大学,571,海外名校,Holy Spirit University of Kaslik
1220
+ 伊万第比利斯国立大学,571,海外名校,Ivane Javakhishvili Tbilisi State University
1221
+ 全北国立大学,571,海外名校,Jeonbuk National University
1222
+ 不来梅大学,571,海外名校,Universität Bremen
1223
+ 瓦伦西亚大学,571,海外名校,Universitat de Valencia
1224
+ 米尼奥大学,571,海外名校,University of Minho
1225
+ 内布拉斯加大学-林肯,571,海外名校,University of Nebraska - Lincoln
1226
+ 拉普拉塔国立大学,581,海外名校,Universidad Nacional de La Plata (UNLP)
1227
+ 圭尔夫大学,581,海外名校,University of Guelph
1228
+ 南佛罗里达大学,581,海外名校,University of South Florida
1229
+ 黎巴嫩美国大学,581,海外名校,Lebanese American University
1230
+ 香港岭南大学,581,海外名校,"Lingnan University, Hong Kong"
1231
+ 默多克大学,581,海外名校,Murdoch University
1232
+ 冈山大学,581,海外名校,Okayama University
1233
+ 萨马拉国立研究大学(Samara University),581,海外名校,Samara National Research University (Samara University)
1234
+ 国立政治大学,591,海外名校,National Chengchi University
1235
+ Savitribai普鲁大学,591,海外名校,Savitribai Phule Pune University
1236
+ 索非亚大学,591,海外名校,Sofia University
1237
+ 哥伦比亚外事大学,591,海外名校,Universidad Externado de Colombia
1238
+ 雷根斯堡大学,591,海外名校,Universität Regensburg
1239
+ 魁北克大学,591,海外名校,Université du Québec
1240
+ 德布勒森大学,591,海外名校,University of Debrecen
1241
+ 卢布尔雅那大学,591,海外名校,University of Ljubljana
1242
+ 巴林应用科学大学,591,海外名校,Applied Science University - Bahrain
1243
+ 比尔肯特大学,591,海外名校,Bilkent University
1244
+ 印度理工学院海德拉巴,591,海外名校,Indian Institute of Technology Hyderabad
1245
+ 熊本大学,591,海外名校,Kumamoto University
1246
+ 阿博大学,601,海外名校,Abo Akademi University
1247
+ 迪拜美国大学,601,海外名校,American University in Dubai
1248
+ 马尼拉雅典大学,601,海外名校,Ateneo de Manila University
1249
+ 班戈大学,601,海外名校,Bangor University
1250
+ 迪拜加拿大大学,601,海外名校,Canadian University Dubai
1251
+ 卡尔顿大学,601,海外名校,Carleton University
1252
+ 中央昆士兰大学,601,海外名校,Central Queensland University
1253
+ 清迈大学,601,海外名校,Chiang Mai University
1254
+ 克拉克大学,601,海外名校,Clark University
1255
+ 科米利亚斯宗座大学,601,海外名校,Comillas Pontifical University
1256
+ 考文垂大学,601,海外名校,Coventry University
1257
+ 岐阜大学,601,海外名校,Gifu University
1258
+ 金泽大学,601,海外名校,Kanazawa University
1259
+ 金斯顿大学,601,海外名校,Kingston University
1260
+ 建国大学,601,海外名校,Konkuk University
1261
+ 墨西哥阿那瓦克大学,601,海外名校,Universidad Anáhuac México
1262
+ 康塞普西翁大学,601,海外名校,Universidad de Concepción
1263
+ 布宜诺斯艾利斯省中央大学 (UNICEN,601,海外名校,Universidad Nacional del Centro de la Provincia de Buenos Aires (UNICEN)
1264
+ 拉蒙鲁尔大学,601,海外名校,Universitat Ramon Llull
1265
+ 霍恩海姆大学,601,海外名校,University Hohenheim
1266
+ 阿威罗大学,601,海外名校,University of Aveiro
1267
+ 辛辛那提大学,601,海外名校,University of Cincinnati
1268
+ 热那亚大学,601,海外名校,University of Genoa
1269
+ 曼尼托巴大学,601,海外名校,University of Manitoba
1270
+ 普利茅斯大学,601,海外名校,University of Plymouth
1271
+ 比勒陀利亚大学,601,海外名校,University of Pretoria
1272
+ 沙迦大学,601,海外名校,University of Sharjah
1273
+ 锡耶纳大学,601,海外名校,University of Siena
1274
+ 南卡罗来纳大学,601,海外名校,University of South Carolina
1275
+ 汉诺威莱布尼兹大学,601,海外名校,Leibniz University Hannover
1276
+ 管理科学大学,601,海外名校,Management and Science University
1277
+ 南京科技大学,601,海外名校,Nanjing University of Science & Technology
1278
+ 帕拉茨基大学,601,海外名校,Palacky University Olomouc
1279
+ Pavol JozefŠafárikUniversity位于科希策,601,海外名校,Pavol Jozef Šafárik University in Košice
1280
+ 釜山国立大学,601,海外名校,Pusan National University
1281
+ S.D.阿斯芬迪亚罗夫哈萨克国立医科大学,601,海外名校,S.D. Asfendiyarov Kazakh National Medical University
1282
+ 世宗大学,601,海外名校,Sejong University
1283
+ 深圳大学,601,海外名校,Shenzhen University
1284
+ 史密斯学院,601,海外名校,Smith College
1285
+ 基辅国立大学,601,海外名校,Taras Shevchenko National University Kiev
1286
+ 法政大学,601,海外名校,Thammasat University
1287
+ 约旦大学,601,海外名校,The University of Jordan
1288
+ 东京农工大学,601,海外名校,Tokyo University of Agriculture and Technology
1289
+ 巴黎第二大学,601,海外名校,University Paris 2 Panthéon-Assas
1290
+ 扎耶德大学,651,海外名校,Zayed University
1291
+ 安蒂奥基亚大学,651,海外名校,Universidad de Antioquia
1292
+ ICESI大学,651,海外名校,Universidad ICESI
1293
+ 秘鲁秘鲁卡耶塔诺大学,651,海外名校,Universidad Peruana Cayetano Heredia
1294
+ 玻利瓦尔纳天主教大学,651,海外名校,Universidad Pontificia Bolivariana
1295
+ 罗斯托克大学,651,海外名校,Universität Rostock
1296
+ 克劳德·伯纳德·里昂大学,651,海外名校,Université Claude Bernard Lyon 1
1297
+ 玛拉工艺大学,651,海外名校,Universiti Teknologi MARA - UiTM
1298
+ 克里特岛大学,651,海外名校,University of Crete
1299
+ 赫尔大学,651,海外名校,University of Hull
1300
+ 海德拉巴大学,651,海外名校,University of Hyderabad
1301
+ 肯塔基大学,651,海外名校,University of Kentucky
1302
+ 马萨诸塞大学波士顿分校,651,海外名校,University of Massachusetts Boston
1303
+ 蒙斯大学,651,海外名校,University of Mons
1304
+ 新不伦瑞克大学,651,海外名校,University of New Brunswick
1305
+ 俄克拉荷马大学,651,海外名校,University of Oklahoma
1306
+ 俄勒冈大学,651,海外名校,University of Oregon
1307
+ 佩奇大学,651,海外名校,University of Pecs
1308
+ 朴茨茅斯大学,651,海外名校,University of Portsmouth
1309
+ 萨拉曼卡大学,651,海外名校,University of Salamanca
1310
+ 的里雅斯特大学,651,海外名校,University of Trieste
1311
+ 佛蒙特大学,651,海外名校,University of Vermont
1312
+ 美利坚大学,651,海外名校,American University
1313
+ 威廉与玛丽学院,651,海外名校,College of William and Mary
1314
+ 布拉迪斯拉发的夸美纽斯大学,651,海外名校,Comenius University in Bratislava
1315
+ 德雷塞尔大学,651,海外名校,Drexel University
1316
+ 伊迪丝科文大学,651,海外名校,Edith Cowan University
1317
+ 罗兰大学,651,海外名校,Eotvos Lorand University
1318
+ 米纳斯吉拉斯州联邦大学,651,海外名校,Universidade Federal de Minas Gerais
1319
+ 博尔扎诺自由大学,651,海外名校,Free University of Bozen-Bolzano
1320
+ 霍华德大学,651,海外名校,Howard University
1321
+ 伊曼纽尔·康德波罗的海联邦大学,651,海外名校,Immanuel Kant Baltic Federal University
1322
+ 马来西亚国际伊斯兰大学,651,海外名校,International Islamic University Malaysia
1323
+ 迦达浦大学,651,海外名校,Jadavpur University
1324
+ 鹿儿岛大学,651,海外名校,Kagoshima University
1325
+ 卡尔·弗朗岑斯大学格拉茨大学,651,海外名校,Karl-Franzens-Universitaet Graz
1326
+ 哈立德国王大学,651,海外名校,King Khalid University
1327
+ 拉合尔管理科学大学,651,海外名校,Lahore University of Management Sciences
1328
+ 洛巴切夫斯基大学,651,海外名校,Lobachevsky University
1329
+ 澳门科技大学,651,海外名校,Macau University of Science & Technology
1330
+ 密歇根理工大学,651,海外名校,Michigan Technological University
1331
+ 雅典大学,651,海外名校,National and Kapodistrian University of Athens
1332
+ 国立中兴大学,651,海外名校,National Chung Hsing University
1333
+ 国立哈尔科夫工业大学,651,海外名校,"National Technical University ""Kharkiv Polytechnic Institute"""
1334
+ 里约热内卢天主教大学,651,海外名校,Pontifícia Universidade Católica do Rio de Janeiro
1335
+ 谢切诺夫大学,651,海外名校,Sechenov University
1336
+ 双威大学,651,海外名校,Sunway University
1337
+ 雪城大学,651,海外名校,Syracuse University
1338
+ 新墨西哥大学-阿尔伯克基,651,海外名校,The University of New Mexico - Albuquerque
1339
+ 德岛大学,651,海外名校,Tokushima University
1340
+ 阿尔斯特大学,651,海外名校,Ulster University
1341
+ 黎巴嫩大学,701,海外名校,Lebanese University
1342
+ 米德尔塞克斯大学,701,海外名校,Middlesex University
1343
+ 乌克兰国立技术大学 – 伊戈尔·西科斯基基辅理工学院,701,海外名校,National Technical University of Ukraine – Igor Sikorsky Kyiv Poly
1344
+ 新泽西理工大学,701,海外名校,New Jersey Institute of Technology
1345
+ 诺桑比亚大学,701,海外名校,Northumbria University
1346
+ 印度金德而全球大学,701,海外名校,O.P. Jindal Global University
1347
+ 大阪府大学,701,海外名校,Osaka Prefecture University
1348
+ 马尔堡菲尔兹大学,701,海外名校,Philipps University of Marburg
1349
+ 普列汉诺夫俄罗斯经济大学,701,海外名校,Plekhanov Russian University of Economics
1350
+ 巴里理工大学,701,海外名校,Politecnico di Bari
1351
+ 穆罕默德·本·法赫德王子大学,701,海外名校,Prince Mohammad Bin Fahd university
1352
+ 罗维拉-威尔吉利大学,701,海外名校,Rovira i Virgili University
1353
+ 圣彼得堡国立电子科技大学,701,海外名校,Saint Petersburg Electrotechnical University ETU-LETI
1354
+ 史蒂文斯理工学院,701,海外名校,Stevens Institute of Technology
1355
+ 苏美州立大学,701,海外名校,Sumy State University
1356
+ 东京都立大学,701,海外名校,Tokyo Metropolitan University
1357
+ 乌法国立航空技术大学,701,海外名校,Ufa State Aviation Technical University
1358
+ 委内瑞拉中央大学,701,海外名校,Universidad Central de Venezuela
1359
+ 萨瓦纳大学,701,海外名校,Universidad de La Sabana
1360
+ 圣安德烈斯大学,701,海外名校,Universidad de San Andrés - UdeSA
1361
+ 伊比利亚美洲大学,701,海外名校,Universidad Iberoamericana IBERO
1362
+ 圣地亚哥德孔波斯特拉大学,701,海外名校,Universidade de Santiago de Compostela
1363
+ 波茨坦大学,701,海外名校,Universität Potsdam
1364
+ 蔚蓝海岸大学,701,海外名校,Université Côte d'Azur
1365
+ 苏塞大学,701,海外名校,Université de Sousse
1366
+ 布拉德福德大学,701,海外名校,University of Bradford
1367
+ 海法大学,701,海外名校,University of Haifa
1368
+ 休斯敦大学,701,海外名校,University of Houston
1369
+ 哈德斯菲尔德大学,701,海外名校,University of Huddersfield
1370
+ 摩德纳大学和雷焦艾米利亚,701,海外名校,University of Modena and Reggio Emilia
1371
+ 南昆士兰大学,701,海外名校,University of Southern Queensland
1372
+ 巴斯克大学,701,海外名校,University of the Basque Country
1373
+ 威斯敏斯特大学,701,海外名校,University of Westminster
1374
+ 温莎大学,701,海外名校,University of Windsor
1375
+ 弗吉尼亚联邦大学,701,海外名校,Virginia Commonwealth University
1376
+ 阿布扎比大学,701,海外名校,Abu Dhabi University
1377
+ 阿治曼大学,701,海外名校,Ajman University
1378
+ 艾因大学,701,海外名校,Al Ain University
1379
+ 海峡大学,701,海外名校,Bogaziçi Üniversitesi
1380
+ 布尔诺工业大学,701,海外名校,Brno University of Technology
1381
+ 查尔斯达尔文大学,701,海外名校,Charles Darwin University
1382
+ 纽约城市大学,701,海外名校,City University of New York
1383
+ 群马大学,701,海外名校,Gunma University
1384
+ 印度理工学院布巴内斯瓦尔,701,海外名校,Indian Institute of Technology Bhubaneswar
1385
+ 布宜诺斯艾利斯技术大学,701,海外名校,Instituto Tecnológico de Buenos Aires (ITBA)
1386
+ 伊斯坦布尔理工大学,701,海外名校,Istanbul Technical University
1387
+ 约夫大学,701,海外名校,Jouf University
1388
+ 阿道夫·伊巴涅斯大学,751,海外名校,Adolfo Ibáñez University
1389
+ 中东美国大学,751,海外名校,American University of the Middle East
1390
+ 白俄罗斯国立技术大学(BNTU),751,海外名校,Belarusian National Technical University (BNTU)
1391
+ 布雷西亚大学,751,海外名校,Brescia University
1392
+ 全南国立大学,751,海外名校,Chonnam National University
1393
+ 忠南国立大学,751,海外名校,Chungnam National University
1394
+ 克拉克森大学,751,海外名校,Clarkson University
1395
+ 檀国大学,751,海外名校,Dankook University
1396
+ 南里奥格兰德联邦大学,751,海外名校,Federal University of Rio Grande do Sul
1397
+ 佐治亚州立大学,751,海外名校,Georgia State University
1398
+ 杜塞尔多夫大学,751,海外名校,Heinrich Heine University Duesseldorf
1399
+ 印尼泗水理工大学,751,海外名校,Institut Teknologi Sepuluh Nopember
1400
+ 国立理工学院 (IPN),751,海外名校,Instituto Politécnico Nacional (IPN)
1401
+ 墨西哥自治技术学院,751,海外名校,Instituto Tecnológico Autónomo de México (ITAM)
1402
+ 印度新德里国立伊斯兰大学,751,海外名校,Jamia Millia Islamia
1403
+ 卡拉干达国立技术大学,751,海外名校,Karaganda State Technical University
1404
+ República大学(乌德拉尔),751,海外名校,Universidad de la República (Udelar)
1405
+ 罗萨里奥大学,751,海外名校,Universidad del Rosario
1406
+ 基多圣弗朗西斯科大学,751,海外名校,Universidad San Francisco de Quito
1407
+ 托拉卡托迪泰拉大学,751,海外名校,Universidad Torcuato Di Tella
1408
+ 佩鲁贾研究大学,751,海外名校,Università degli Studi di Perugia
1409
+ 舍布鲁克大学,751,海外名校,Université de Sherbrooke
1410
+ 国能大学,751,海外名校,Universiti Tenaga Nasional (UNITEN)
1411
+ 中佛罗里达大学,751,海外名校,University of Central Florida
1412
+ 丹佛大学,751,海外名校,University of Denver
1413
+ 格林威治大学,751,海外名校,University of Greenwich
1414
+ 马里兰大学巴尔的摩,751,海外名校,"University of Maryland, Baltimore County"
1415
+ 萨格勒布大学,751,海外名校,University of Zagreb
1416
+ 曼尼珀尔高等教育学院,751,海外名校,Manipal Academy of Higher Education
1417
+ 梅努斯大学,751,海外名校,Maynooth University
1418
+ 纽芬兰纪念大学,751,海外名校,Memorial University of Newfoundland
1419
+ 瓦尔帕莱索天主教大学,751,海外名校,Pontificia Universidad Catolica de Valparaiso
1420
+ 努拉·本·公主阿卜杜勒拉曼大学,751,海外名校,Princess Nourah bint Abdulrahman University
1421
+ 里加技术大学,751,海外名校,Riga Technical University
1422
+ 立命馆大学,751,海外名校,Ritsumeikan University
1423
+ 设拉子大学,751,海外名校,Shiraz University
1424
+ 南十字大学,751,海外名校,Southern Cross University
1425
+ 纽约州立大学奥尔巴尼分校,751,海外名校,State University of New York Albany
1426
+ 塔林科技大学,751,海外名校,Tallinn University of Technology
1427
+ 天普大学,751,海外名校,Temple University
1428
+ 维尔纽斯Gediminas技术大学,751,海外名校,Vilnius Gediminas Technical University
1429
+ 伍斯特理工学院,751,海外名校,Worcester Polytechnic Institute
1430
+ 越南国立大学胡志明市,801,海外名校,Viet Nam National University Ho Chi Minh City (VNU-HCM)
1431
+ 越南河内国立大学,801,海外名校,Vietnam National University Hanoi
1432
+ 维陶塔斯·马格努斯大学,801,海外名校,Vytautas Magnus University
1433
+ 西弗吉尼亚大学,801,海外名校,West Virginia University
1434
+ 弗罗茨瓦夫科技大学,801,海外名校,Wroclaw University of Science and Technology
1435
+ 西交利物浦大学,801,海外名校,Xi‘an Jiaotong-Liverpool University
1436
+ 山口大学,801,海外名校,Yamaguchi University
1437
+ 埃里温州立大学,801,海外名校,Yerevan State University
1438
+ 岭南大学,801,海外名校,Yeungnam University
1439
+ 横滨国立大学,801,海外名校,Yokohama National University
1440
+ 查平戈自治大学,801,海外名校,Universidad Autónoma de Chapingo
1441
+ 伊达尔戈自治大学,801,海外名校,Universidad Autónoma del Estado de Hidalgo
1442
+ 墨西哥自治大学,801,海外名校,Universidad Autonoma del Estado de Mexico
1443
+ 大都会大学(UAM),801,海外名校,Universidad Autónoma Metropolitana (UAM)
1444
+ 卡塔利卡大学安德烈斯·贝洛大学,801,海外名校,Universidad Católica Andres Bello
1445
+ 乌拉圭卡托利卡大学(UCU),801,海外名校,Universidad Católica del Uruguay (UCU)
1446
+ 瓜达拉哈拉大学,801,海外名校,Universidad de Guadalajara
1447
+ 美洲普埃布拉大学(UDLAP),801,海外名校,Universidad de las Américas Puebla (UDLAP)
1448
+ 洛斯安第斯大学-(ULA)梅里达,801,海外名校,Universidad de Los Andes - (ULA) Mérida
1449
+ 洛斯安第斯大学-智利,801,海外名校,Universidad de los Andes - Chile
1450
+ 奥维耶多大学,801,海外名校,Universidad de Oviedo
1451
+ 瓦莱大学,801,海外名校,Universidad del Valle
1452
+ 圣地亚哥大学门户网站,801,海外名校,University Diego Portales
1453
+ EAFIT大学,801,海外名校,Universidad EAFIT
1454
+ 国立圣路易斯大学,801,海外名校,Universidad Nacional de San Luis
1455
+ 圣马科斯国立大学市长,801,海外名校,Universidad Nacional Mayor de San Marcos
1456
+ 西蒙玻利瓦尔大学(USB),801,海外名校,Universidad Simón Bolívar (USB)
1457
+ 巴拿马巴拿马技术大学(UTP),801,海外名校,Universidad Tecnológica de Panamá (UTP)
1458
+ 全国技术大学(UTN),801,海外名校,Universidad Tecnológica Nacional (UTN)
1459
+ 卡图里卡葡萄牙大学-UCP,801,海外名校,Universidade Católica Portuguesa - UCP
1460
+ 拉科鲁尼亚大学,801,海外名校,Universidade da Coruna
1461
+ 巴西利亚大学,801,海外名校,Universidade de Brasília
1462
+ 费拉拉大学,801,海外名校,Universita' degli Studi di Ferrara
1463
+ 乌迪内大学研究,801,海外名校,Università degli Studi di Udine
1464
+ 意大利马尔凯理工大学,801,海外名校,Universita' Politecnica delle Marche
1465
+ 帕贾扎兰大学,801,海外名校,Universitas Padjadjaran
1466
+ 杜伊斯堡 - 埃森大学,801,海外名校,University of Duisburg-Essen
1467
+ 里尔大学,801,海外名校,Université de Lille
1468
+ 洛林大学,801,海外名校,Universite de Lorraine
1469
+ 南特大学,801,海外名校,Université de Nantes
1470
+ 雷恩第一大学,801,海外名校,Université de Rennes 1
1471
+ 图卢兹大学1 Capitole,801,海外名校,Université Toulouse 1 Capitole
1472
+ 吉隆坡大学,801,海外名校,Universiti Kuala Lumpur
1473
+ 马来西亚彭亨大学,801,海外名校,Universiti Malaysia Pahang
1474
+ 马来西亚玻璃市大学,801,海外名校,Universiti Malaysia Perlis
1475
+ 拉曼大学,801,海外名校,Universiti Tunku Abdul Rahman (UTAR)
1476
+ 阿利坎特大学,801,海外名校,University of Alicante
1477
+ 巴格达大学,801,海外名校,University of Baghdad
1478
+ 巴林大学,801,海外名校,University of Bahrain
1479
+ 巴里大学,801,海外名校,University of Bari
1480
+ 布莱顿大学,801,海外名校,University of Brighton
1481
+ 加尔各答大学,801,海外名校,University of Calcutta
1482
+ 中央兰开夏大学,801,海外名校,University of Central Lancashire
1483
+ 达卡大学,801,海外名校,University of Dhaka
1484
+ 东伦敦大学,801,海外名校,University of East London
1485
+ 拉合尔工程技术大学(UET),801,海外名校,University of Engineering & Technology (UET) Lahore
1486
+ 格但斯克大学,801,海外名校,University of Gdansk
1487
+ 哈特福德大学,801,海外名校,University of Hartford
1488
+ 赫特福德大学,801,海外名校,University of Hertfordshire
1489
+ 赫拉德茨克拉洛夫大学,801,海外名校,University of Hradec Kralove
1490
+ 夸祖鲁纳塔尔大学,801,海外名校,University of KwaZulu Natal
1491
+ 罗兹大学,801,海外名校,University of Lodz
1492
+ 马耳他大学,801,海外名校,University of Malta
1493
+ 马里博尔大学,801,海外名校,University of Maribor
1494
+ 墨西拿大学,801,海外名校,University of Messina
1495
+ 密西西比大学,801,海外名校,University of Mississippi
1496
+ 密苏里大学堪萨斯城分校,801,海外名校,"University of Missouri, Kansas City"
1497
+ 穆尔西亚大学,801,海外名校,University of Murcia
1498
+ 新英格兰大学,801,海外名校,University of New England
1499
+ 新罕布什尔大学,801,海外名校,University of New Hampshire
1500
+ 帕尔马大学,801,海外名校,University of Parma
1501
+ 帕特雷大学,801,海外名校,University of Patras
1502
+ 萨勒诺大学,801,海外名校,University of Salerno
1503
+ 索尔福德大学,801,海外名校,University of Salford
1504
+ 首尔大学,801,海外名校,University of Seoul
1505
+ 旁遮普大学,801,海外名校,University of the Punjab
1506
+ 英格兰西部大学,801,海外名校,University of the West of England
1507
+ 塔尔萨大学,801,海外名校,University of Tulsa
1508
+ 秋明大学,801,海外名校,University of Tyumen
1509
+ 维罗纳大学,801,海外名校,University of Verona
1510
+ 威斯康星大学-密尔沃基,801,海外名校,University of Wisconsin - Milwaukee
1511
+ 弗罗茨瓦夫大学,801,海外名校,University of Wroclaw
1512
+ 怀俄明大学,801,海外名校,University of Wyoming
1513
+ 齐里纳大学,801,海外名校,University of Žilina
1514
+ Y.A.院士布克托夫卡拉干达大学,801,海外名校,Academician Y.A. Buketov Karaganda University
1515
+ 亚当·米基维奇大学,801,海外名校,Adam Mickiewicz University
1516
+ AGH科技大学,801,海外名校,AGH University of Science & Technology
1517
+ 艾恩夏姆斯大学,801,海外名校,Ain Shams University
1518
+ 安卡拉大学,801,海外名校,Ankara Üniversitesi
1519
+ 安娜大学,801,海外名校,Anna University
1520
+ 奥本大学,801,海外名校,Auburn University
1521
+ 澳洲天主教大学,801,海外名校,Australian Catholic University
1522
+ 孟加拉国工程技术大学,801,海外名校,Bangladesh University of Engineering and Technology
1523
+ 贝鲁特阿拉伯大学,801,海外名校,Beirut Arab University
1524
+ 比勒费尔德大学,801,海外名校,Bielefeld University
1525
+ 纽约州立大学宾汉姆顿大学,801,海外名校,Binghamton University
1526
+ 布达佩斯科技经济大学,801,海外名校,Budapest University of Technology & Economics
1527
+ 威尼斯福斯卡里宫大学,801,海外名校,Ca’ Foscari University of Venice
1528
+ 卡塔尼亚大学,801,海外名校,Catania University
1529
+ 查尔斯斯特大学,801,海外名校,Charles Sturt University
1530
+ 克莱姆森大学,801,海外名校,Clemson University
1531
+ 布达佩斯科维努斯大学,801,海外名校,Corvinus University of Budapest
1532
+ 克拉科夫工业大学,801,海外名校,Cracow University of Technology
1533
+ CY塞尔吉巴黎大学,801,海外名校,CY Cergy Paris Université
1534
+ 布拉格捷克生命科学大学,801,海外名校,Czech University of Life Sciences in Prague
1535
+ 德拉萨大学,801,海外名校,De La Salle University
1536
+ 德蒙福特大学,801,海外名校,De Montfort University
1537
+ 多特蒙德工业大学,801,海外名校,Technical University of Dortmund
1538
+ 爱丁堡纳皮尔大学,801,海外名校,Edinburgh Napier University
1539
+ 巴拉那联邦大学,801,海外名校,Federal University of Parana
1540
+ 伯南布哥联邦大学,801,海外名校,Federal University of Pernambuco
1541
+ 圣卡塔琳娜联邦大学,801,海外名校,Federal University of Santa Catarina
1542
+ 圣卡洛斯联邦大学,801,海外名校,Federal University of Sao Carlos
1543
+ 费德里科圣玛丽亚技术大学,801,海外名校,Federico Santa María Technical University
1544
+ 俄罗斯联邦政府金融大学,801,海外名校,Financial University under the Government of the Russian Federation
1545
+ 佛罗里达国际大学,801,海外名校,Florida International University
1546
+ 福特汉姆大学,801,海外名校,Fordham University
1547
+ 格但斯克工业大学,801,海外名校,Gdansk University of Technology
1548
+ 乔治梅森大学,801,海外名校,George Mason University
1549
+ 德国约旦大学,801,海外名校,German Jordanian University
1550
+ 海湾科技大学,801,海外名校,Gulf University for Science and Technology
1551
+ 哈斯特帕大学,801,海外名校,Hacettepe University
1552
+ 印第安纳大学-印第安纳波利斯,801,海外名校,Indiana University-Purdue University at Indianapolis
1553
+ 国际基督教大学,801,海外名校,International Christian University
1554
+ 麦地那伊斯兰大学,801,海外名校,Islamic University of Madinah
1555
+ 伊斯坦布尔大学,801,海外名校,Istanbul University
1556
+ 约旦科技大学,801,海外名校,Jordan University of Science & Technology
1557
+ 堪萨斯州立大学,801,海外名校,Kansas State University
1558
+ 卡塞萨特大学,801,海外名校,Kasetsart University
1559
+ 考纳斯工业大学,801,海外名校,Kaunas University of Technology
1560
+ 哈萨克阿布赖汗国际关系与世界语言大学,801,海外名校,Kazakh Ablai Khan University of International Relations and World Languages
1561
+ 哈萨克斯坦-英国技术大学,801,海外名校,Kazakh-British Technical University
1562
+ 喀山国立研究技术大学,801,海外名校,Kazan National Research Technical University
1563
+ 孔敬大学,801,海外名校,Khon Kaen University
1564
+ 费萨尔国王大学,801,海外名校,King Faisal University
1565
+ 泰国国王科技大学,801,海外名校,King Mongkut’s University of Technology Thonburi
1566
+ 京都工业大学,801,海外名校,Kyoto Institute of Technology
1567
+ 九州工业大学,801,海外名校,Kyushu Institute of Technology
1568
+ 利物浦约翰摩尔斯大学,801,海外名校,Liverpool John Moores University
1569
+ 罗兹工业大学,801,海外名校,Lodz University of Technology
1570
+ 伦敦都会大学,801,海外名校,London Metropolitan University
1571
+ 伦敦南岸大学,801,海外名校,London South Bank University
1572
+ 路易斯安那州立大学,801,海外名校,Louisiana State University
1573
+ 芝加哥洛约拉大学,801,海外名校,Loyola University Chicago
1574
+ 利沃夫理工大学,801,海外名校,Lviv Polytechnic National University
1575
+ 曼彻斯特城市大学,801,海外名校,Manchester Metropolitan University
1576
+ 布尔诺的孟德尔大学,801,海外名校,Mendel University Brno
1577
+ 门捷列夫化工大学,801,海外名校,Mendeleev University of Chemical Technology
1578
+ 国立中正大学,801,海外名校,National Chung Cheng University
1579
+ 科尔多瓦国立大学,801,海外名校,National University of Cordoba
1580
+ 罗萨里奥国立大学,801,海外名校,National University of Rosario
1581
+ 尼古拉·哥白尼大学,801,海外名校,Nicolaus Copernicus University
1582
+ NJSC KIMEP大学,801,海外名校,NJSC KIMEP University
1583
+ 诺丁汉特伦特大学,801,海外名校,Nottingham Trent University
1584
+ 新西伯利亚国立技术大学,801,海外名校,Novosibirsk State Technical University
1585
+ 俄克拉荷马州立大学,801,海外名校,Oklahoma State University
1586
+ 萨尔茨堡巴黎罗德隆大学,801,海外名校,Paris Lodron University of Salzburg
1587
+ 那不勒斯帕斯诺普大学,801,海外名校,Parthenope University Naples
1588
+ 彼尔姆国立研究大学,801,海外名校,Perm State National Research University
1589
+ 本地治里大学,801,海外名校,Pondicherry University
1590
+ 圣保罗天主教大学,801,海外名校,Pontifícia Universidade Católica de São Paulo
1591
+ 波兹南理工大学,801,海外名校,Poznan University of Technology
1592
+ 宋卡王子大学,801,海外名校,Prince of Songkla University
1593
+ 苏美亚公主科技大学,801,海外名校,Princess Sumaya University for Technology
1594
+ 卡西姆大学,801,海外名校,Qassim University
1595
+ 英国爱丁堡玛格丽特女王大学,801,海外名校,"Queen Margaret University , Edinburgh"
1596
+ 罗得大学,801,海外名校,Rhodes University
1597
+ 里加斯特拉丁斯大学,801,海外名校,Riga Stradins University
1598
+ 罗伯特·戈登大学,801,海外名校,Robert Gordon University
1599
+ 罗马特雷大学,801,海外名校,Roma Tre University
1600
+ 俄罗斯总统国民经济与公共行政学院,801,海外名校,Russian Presidential Academy of National Economy and Public Admini
1601
+ 罗格斯大学纽瓦克分校,801,海外名校,Rutgers University - Newark
1602
+ 瑞尔森大学,801,海外名校,Ryerson University
1603
+ 西雅图大学,801,海外名校,Seattle University
1604
+ 信州大学,801,海外名校,Shinshu University
1605
+ Siksha'O'Anusandhan,801,海外名校,Siksha ‘O’ Anusandhan
1606
+ 西里西亚工业大学,801,海外名校,Silesian University of Technology
1607
+ 斯洛伐克工业大学布拉迪斯拉发,801,海外名校,Slovak University of Technology Bratislava
1608
+ 上智大学,801,海外名校,Sophia University
1609
+ 南乌拉尔州立大学,801,海外名校,South Ural State University
1610
+ 南卫理公会大学,801,海外名校,Southern Methodist University
1611
+ 塞切尼伊斯特万大学,801,海外名校,Széchenyi István University
1612
+ 圣伊斯特万大学,801,海外名校,Szent Istvan University
1613
+ 科希策技术大学,801,海外名校,Technical University of Kosice
1614
+ 利贝雷茨技术大学,801,海外名校,Technical University of Liberec
1615
+ 都柏林科技大学,801,海外名校,Technological University Dublin
1616
+ 哥斯达黎加技术中心(TECN),801,海外名校,Tecnológico de Costa Rica -TEC
1617
+ 德克萨斯理工大学,801,海外名校,Texas Tech University
1618
+ 阿拉巴马大学,801,海外名校,The University of Alabama
1619
+ 阿尔伯塔大学,126,海外名校,
1620
+ 加州艺术学院,1,海外名校,California Institute of the Arts
1621
+ 萨凡纳艺术学院,2,海外名校,Savannah College of Art and Design
1622
+ 瑞格林艺术设计学院,3,海外名校,Ringling College of Art and Design
1623
+ 纽约视觉艺术学院,4,海外名校,School of Visual Art
1624
+ 谢尔丹学院,6,海外名校,Sheridan College
1625
+ 伯恩茅斯大学,7,海外名校,Bournemouth University
1626
+ 提赛德大学,8,海外名校,Teesside University
1627
+ 高布兰学院,9,海外名校,Gobelins
deepdoc/parser/resume/entities/res/schools.csv ADDED
The diff for this file is too large to render. See raw diff
 
deepdoc/parser/resume/entities/schools.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: UTF-8 -*-
2
+ import os, json,re,copy
3
+ import pandas as pd
4
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
5
+ TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("")
6
+ TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
7
+ GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
8
+ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
9
+
10
+
11
+ def loadRank(fnm):
12
+ global TBL
13
+ TBL["rank"] = 1000000
14
+ with open(fnm, "r",encoding='UTF-8') as f:
15
+ while True:
16
+ l = f.readline()
17
+ if not l:break
18
+ l = l.strip("\n").split(",")
19
+ try:
20
+ nm,rk = l[0].strip(),int(l[1])
21
+ #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
22
+ TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
23
+ except Exception as e:
24
+ pass
25
+
26
+
27
+ loadRank(os.path.join(current_file_path, "res/school.rank.csv"))
28
+
29
+
30
+ def split(txt):
31
+ tks = []
32
+ for t in re.sub(r"[ \t]+", " ",txt).split(" "):
33
+ if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \
34
+ re.match(r"[a-zA-Z]", t) and tks:
35
+ tks[-1] = tks[-1] + " " + t
36
+ else:tks.append(t)
37
+ return tks
38
+
39
+
40
+ def select(nm):
41
+ global TBL
42
+ if not nm:return
43
+ if isinstance(nm, list):nm = str(nm[0])
44
+ nm = split(nm)[0]
45
+ nm = str(nm).lower().strip()
46
+ nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
47
+ nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
48
+ nm = re.sub(r"大学.*学院", "大学", nm)
49
+ tbl = copy.deepcopy(TBL)
50
+ tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+")))
51
+ res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | (tbl.hit_alias == True))]
52
+ if res.empty:return
53
+
54
+ return json.loads(res.to_json(orient="records"))[0]
55
+
56
+
57
+ def is_good(nm):
58
+ global GOOD_SCH
59
+ nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
60
+ nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
61
+ return nm in GOOD_SCH
62
+
deepdoc/parser/resume/step_one.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import json
3
+ from deepdoc.parser.resume.entities import degrees, regions, industries
4
+
5
+ FIELDS = [
6
+ "address STRING",
7
+ "annual_salary int",
8
+ "annual_salary_from int",
9
+ "annual_salary_to int",
10
+ "birth STRING",
11
+ "card STRING",
12
+ "certificate_obj string",
13
+ "city STRING",
14
+ "corporation_id int",
15
+ "corporation_name STRING",
16
+ "corporation_type STRING",
17
+ "degree STRING",
18
+ "discipline_name STRING",
19
+ "education_obj string",
20
+ "email STRING",
21
+ "expect_annual_salary int",
22
+ "expect_city_names string",
23
+ "expect_industry_name STRING",
24
+ "expect_position_name STRING",
25
+ "expect_salary_from int",
26
+ "expect_salary_to int",
27
+ "expect_type STRING",
28
+ "gender STRING",
29
+ "industry_name STRING",
30
+ "industry_names STRING",
31
+ "is_deleted STRING",
32
+ "is_fertility STRING",
33
+ "is_house STRING",
34
+ "is_management_experience STRING",
35
+ "is_marital STRING",
36
+ "is_oversea STRING",
37
+ "language_obj string",
38
+ "name STRING",
39
+ "nation STRING",
40
+ "phone STRING",
41
+ "political_status STRING",
42
+ "position_name STRING",
43
+ "project_obj string",
44
+ "responsibilities string",
45
+ "salary_month int",
46
+ "scale STRING",
47
+ "school_name STRING",
48
+ "self_remark string",
49
+ "skill_obj string",
50
+ "title_name STRING",
51
+ "tob_resume_id STRING",
52
+ "updated_at Timestamp",
53
+ "wechat STRING",
54
+ "work_obj string",
55
+ "work_experience int",
56
+ "work_start_time BIGINT"
57
+ ]
58
+
59
+ def refactor(df):
60
+ def deal_obj(obj, k, kk):
61
+ if not isinstance(obj, type({})):
62
+ return ""
63
+ obj = obj.get(k, {})
64
+ if not isinstance(obj, type({})):
65
+ return ""
66
+ return obj.get(kk, "")
67
+
68
+ def loadjson(line):
69
+ try:
70
+ return json.loads(line)
71
+ except Exception as e:
72
+ pass
73
+ return {}
74
+
75
+ df["obj"] = df["resume_content"].map(lambda x: loadjson(x))
76
+ df.fillna("", inplace=True)
77
+
78
+ clms = ["tob_resume_id", "updated_at"]
79
+
80
+ def extract(nms, cc=None):
81
+ nonlocal clms
82
+ clms.extend(nms)
83
+ for c in nms:
84
+ if cc:
85
+ df[c] = df["obj"].map(lambda x: deal_obj(x, cc, c))
86
+ else:
87
+ df[c] = df["obj"].map(
88
+ lambda x: json.dumps(
89
+ x.get(
90
+ c,
91
+ {}),
92
+ ensure_ascii=False) if isinstance(
93
+ x,
94
+ type(
95
+ {})) and (
96
+ isinstance(
97
+ x.get(c),
98
+ type(
99
+ {})) or not x.get(c)) else str(x).replace(
100
+ "None",
101
+ ""))
102
+
103
+ extract(["education", "work", "certificate", "project", "language",
104
+ "skill"])
105
+ extract(["wechat", "phone", "is_deleted",
106
+ "name", "tel", "email"], "contact")
107
+ extract(["nation", "expect_industry_name", "salary_month",
108
+ "industry_ids", "is_house", "birth", "annual_salary_from",
109
+ "annual_salary_to", "card",
110
+ "expect_salary_to", "expect_salary_from",
111
+ "expect_position_name", "gender", "city",
112
+ "is_fertility", "expect_city_names",
113
+ "political_status", "title_name", "expect_annual_salary",
114
+ "industry_name", "address", "position_name", "school_name",
115
+ "corporation_id",
116
+ "is_oversea", "responsibilities",
117
+ "work_start_time", "degree", "management_experience",
118
+ "expect_type", "corporation_type", "scale", "corporation_name",
119
+ "self_remark", "annual_salary", "work_experience",
120
+ "discipline_name", "marital", "updated_at"], "basic")
121
+
122
+ df["degree"] = df["degree"].map(lambda x: degrees.get_name(x))
123
+ df["address"] = df["address"].map(lambda x: " ".join(regions.get_names(x)))
124
+ df["industry_names"] = df["industry_ids"].map(lambda x: " ".join([" ".join(industries.get_names(i)) for i in
125
+ str(x).split(",")]))
126
+ clms.append("industry_names")
127
+
128
+ def arr2str(a):
129
+ if not a:
130
+ return ""
131
+ if isinstance(a, list):
132
+ a = " ".join([str(i) for i in a])
133
+ return str(a).replace(",", " ")
134
+
135
+ df["expect_industry_name"] = df["expect_industry_name"].map(
136
+ lambda x: arr2str(x))
137
+ df["gender"] = df["gender"].map(
138
+ lambda x: "男" if x == 'M' else (
139
+ "女" if x == 'F' else ""))
140
+ for c in ["is_fertility", "is_oversea", "is_house",
141
+ "management_experience", "marital"]:
142
+ df[c] = df[c].map(
143
+ lambda x: '是' if x == 'Y' else (
144
+ '否' if x == 'N' else ""))
145
+ df["is_management_experience"] = df["management_experience"]
146
+ df["is_marital"] = df["marital"]
147
+ clms.extend(["is_management_experience", "is_marital"])
148
+
149
+ df.fillna("", inplace=True)
150
+ for i in range(len(df)):
151
+ if not df.loc[i, "phone"].strip() and df.loc[i, "tel"].strip():
152
+ df.loc[i, "phone"] = df.loc[i, "tel"].strip()
153
+
154
+ for n in ["industry_ids", "management_experience", "marital", "tel"]:
155
+ for i in range(len(clms)):
156
+ if clms[i] == n:
157
+ del clms[i]
158
+ break
159
+
160
+ clms = list(set(clms))
161
+
162
+ df = df.reindex(sorted(clms), axis=1)
163
+ #print(json.dumps(list(df.columns.values)), "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
164
+ for c in clms:
165
+ df[c] = df[c].map(
166
+ lambda s: str(s).replace(
167
+ "\t",
168
+ " ").replace(
169
+ "\n",
170
+ "\\n").replace(
171
+ "\r",
172
+ "\\n"))
173
+ # print(df.values.tolist())
174
+ return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0]))
deepdoc/parser/resume/step_two.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import re, copy, time, datetime, demjson, \
3
+ traceback, signal
4
+ import numpy as np
5
+ from deepdoc.parser.resume.entities import degrees, schools, corporations
6
+ from rag.nlp import huqie, surname
7
+ from xpinyin import Pinyin
8
+ from contextlib import contextmanager
9
+
10
+
11
+ class TimeoutException(Exception): pass
12
+
13
+
14
+ @contextmanager
15
+ def time_limit(seconds):
16
+ def signal_handler(signum, frame):
17
+ raise TimeoutException("Timed out!")
18
+
19
+ signal.signal(signal.SIGALRM, signal_handler)
20
+ signal.alarm(seconds)
21
+ try:
22
+ yield
23
+ finally:
24
+ signal.alarm(0)
25
+
26
+
27
+ ENV = None
28
+ PY = Pinyin()
29
+
30
+
31
+ def rmHtmlTag(line):
32
+ return re.sub(r"<[a-z0-9.\"=';,:\+_/ -]+>", " ", line, 100000, re.IGNORECASE)
33
+
34
+
35
+ def highest_degree(dg):
36
+ if not dg: return ""
37
+ if type(dg) == type(""): dg = [dg]
38
+ m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
39
+ return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
40
+
41
+
42
+ def forEdu(cv):
43
+ if not cv.get("education_obj"):
44
+ cv["integerity_flt"] *= 0.8
45
+ return cv
46
+
47
+ first_fea, fea, maj, fmaj, deg, fdeg, sch, fsch, st_dt, ed_dt = [], [], [], [], [], [], [], [], [], []
48
+ edu_nst = []
49
+ edu_end_dt = ""
50
+ cv["school_rank_int"] = 1000000
51
+ for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
52
+ e = {}
53
+ if n.get("end_time"):
54
+ if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"]
55
+ try:
56
+ dt = n["end_time"]
57
+ if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
58
+ y, m, d = getYMD(dt)
59
+ ed_dt.append(str(y))
60
+ e["end_dt_kwd"] = str(y)
61
+ except Exception as e:
62
+ pass
63
+ if n.get("start_time"):
64
+ try:
65
+ dt = n["start_time"]
66
+ if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
67
+ y, m, d = getYMD(dt)
68
+ st_dt.append(str(y))
69
+ e["start_dt_kwd"] = str(y)
70
+ except Exception as e:
71
+ pass
72
+
73
+ r = schools.select(n.get("school_name", ""))
74
+ if r:
75
+ if str(r.get("type", "")) == "1": fea.append("211")
76
+ if str(r.get("type", "")) == "2": fea.append("211")
77
+ if str(r.get("is_abroad", "")) == "1": fea.append("留学")
78
+ if str(r.get("is_double_first", "")) == "1": fea.append("双一流")
79
+ if str(r.get("is_985", "")) == "1": fea.append("985")
80
+ if str(r.get("is_world_known", "")) == "1": fea.append("海外知名")
81
+ if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"]
82
+
83
+ if n.get("school_name") and isinstance(n["school_name"], str):
84
+ sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
85
+ e["sch_nm_kwd"] = sch[-1]
86
+ fea.append(huqie.qieqie(huqie.qie(n.get("school_name", ""))).split(" ")[-1])
87
+
88
+ if n.get("discipline_name") and isinstance(n["discipline_name"], str):
89
+ maj.append(n["discipline_name"])
90
+ e["major_kwd"] = n["discipline_name"]
91
+
92
+ if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1"
93
+
94
+ if n.get("degree"):
95
+ d = degrees.get_name(n["degree"])
96
+ if d: e["degree_kwd"] = d
97
+ if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)",
98
+ n.get(
99
+ "school_name",
100
+ ""))): d = "专升本"
101
+ if d: deg.append(d)
102
+
103
+ # for first degree
104
+ if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
105
+ fdeg = [d]
106
+ if n.get("school_name"): fsch = [n["school_name"]]
107
+ if n.get("discipline_name"): fmaj = [n["discipline_name"]]
108
+ first_fea = copy.deepcopy(fea)
109
+
110
+ edu_nst.append(e)
111
+
112
+ cv["sch_rank_kwd"] = []
113
+ if cv["school_rank_int"] <= 20 \
114
+ or ("海外名校" in fea and cv["school_rank_int"] <= 200):
115
+ cv["sch_rank_kwd"].append("顶尖学校")
116
+ elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \
117
+ or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \
118
+ cv["school_rank_int"] > 200):
119
+ cv["sch_rank_kwd"].append("精英学校")
120
+ elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \
121
+ or ("海外名校" in fea and cv["school_rank_int"] > 500):
122
+ cv["sch_rank_kwd"].append("优质学校")
123
+ else:
124
+ cv["sch_rank_kwd"].append("一般学校")
125
+
126
+ if edu_nst: cv["edu_nst"] = edu_nst
127
+ if fea: cv["edu_fea_kwd"] = list(set(fea))
128
+ if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea))
129
+ if maj: cv["major_kwd"] = maj
130
+ if fsch: cv["first_school_name_kwd"] = fsch
131
+ if fdeg: cv["first_degree_kwd"] = fdeg
132
+ if fmaj: cv["first_major_kwd"] = fmaj
133
+ if st_dt: cv["edu_start_kwd"] = st_dt
134
+ if ed_dt: cv["edu_end_kwd"] = ed_dt
135
+ if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt])
136
+ if deg:
137
+ if "本科" in deg and "专科" in deg:
138
+ deg.append("专升本")
139
+ deg = [d for d in deg if d != '本科']
140
+ cv["degree_kwd"] = deg
141
+ cv["highest_degree_kwd"] = highest_degree(deg)
142
+ if edu_end_dt:
143
+ try:
144
+ if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt)
145
+ if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
146
+ y, m, d = getYMD(edu_end_dt)
147
+ cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
148
+ except Exception as e:
149
+ print("EXCEPTION: ", e, edu_end_dt, cv.get("work_exp_flt"))
150
+ if sch:
151
+ cv["school_name_kwd"] = sch
152
+ if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
153
+ or all([c.lower() in ["硕士", "博士", "mba", "博士后"] for c in cv.get("degree_kwd", [])]) \
154
+ or not cv.get("degree_kwd"):
155
+ for c in sch:
156
+ if schools.is_good(c):
157
+ if "tag_kwd" not in cv: cv["tag_kwd"] = []
158
+ cv["tag_kwd"].append("好学校")
159
+ cv["tag_kwd"].append("好学历")
160
+ break
161
+ if (len(cv.get("degree_kwd", [])) >= 1 and \
162
+ "本科" in cv["degree_kwd"] and \
163
+ any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
164
+ or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
165
+ or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
166
+ if "tag_kwd" not in cv: cv["tag_kwd"] = []
167
+ if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历")
168
+
169
+ if cv.get("major_kwd"): cv["major_tks"] = huqie.qie(" ".join(maj))
170
+ if cv.get("school_name_kwd"): cv["school_name_tks"] = huqie.qie(" ".join(sch))
171
+ if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = huqie.qie(" ".join(fsch))
172
+ if cv.get("first_major_kwd"): cv["first_major_tks"] = huqie.qie(" ".join(fmaj))
173
+
174
+ return cv
175
+
176
+
177
+ def forProj(cv):
178
+ if not cv.get("project_obj"): return cv
179
+
180
+ pro_nms, desc = [], []
181
+ for i, n in enumerate(
182
+ sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "",
183
+ reverse=True)):
184
+ if n.get("name"): pro_nms.append(n["name"])
185
+ if n.get("describe"): desc.append(str(n["describe"]))
186
+ if n.get("responsibilities"): desc.append(str(n["responsibilities"]))
187
+ if n.get("achivement"): desc.append(str(n["achivement"]))
188
+
189
+ if pro_nms:
190
+ # cv["pro_nms_tks"] = huqie.qie(" ".join(pro_nms))
191
+ cv["project_name_tks"] = huqie.qie(pro_nms[0])
192
+ if desc:
193
+ cv["pro_desc_ltks"] = huqie.qie(rmHtmlTag(" ".join(desc)))
194
+ cv["project_desc_ltks"] = huqie.qie(rmHtmlTag(desc[0]))
195
+
196
+ return cv
197
+
198
+
199
+ def json_loads(line):
200
+ return demjson.decode(re.sub(r": *(True|False)", r": '\1'", line))
201
+
202
+
203
+ def forWork(cv):
204
+ if not cv.get("work_obj"):
205
+ cv["integerity_flt"] *= 0.7
206
+ return cv
207
+
208
+ flds = ["position_name", "corporation_name", "corporation_id", "responsibilities",
209
+ "industry_name", "subordinates_count"]
210
+ duas = []
211
+ scales = []
212
+ fea = {c: [] for c in flds}
213
+ latest_job_tm = ""
214
+ goodcorp = False
215
+ goodcorp_ = False
216
+ work_st_tm = ""
217
+ corp_tags = []
218
+ for i, n in enumerate(
219
+ sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "",
220
+ reverse=True)):
221
+ if type(n) == type(""):
222
+ try:
223
+ n = json_loads(n)
224
+ except Exception as e:
225
+ continue
226
+
227
+ if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
228
+ for c in flds:
229
+ if not n.get(c) or str(n[c]) == '0':
230
+ fea[c].append("")
231
+ continue
232
+ if c == "corporation_name":
233
+ n[c] = corporations.corpNorm(n[c], False)
234
+ if corporations.is_good(n[c]):
235
+ if i == 0:
236
+ goodcorp = True
237
+ else:
238
+ goodcorp_ = True
239
+ ct = corporations.corp_tag(n[c])
240
+ if i == 0:
241
+ corp_tags.extend(ct)
242
+ elif ct and ct[0] != "软外":
243
+ corp_tags.extend([f"{t}(曾)" for t in ct])
244
+
245
+ fea[c].append(rmHtmlTag(str(n[c]).lower()))
246
+
247
+ y, m, d = getYMD(n.get("start_time"))
248
+ if not y or not m: continue
249
+ st = "%s-%02d-%02d" % (y, int(m), int(d))
250
+ latest_job_tm = st
251
+
252
+ y, m, d = getYMD(n.get("end_time"))
253
+ if (not y or not m) and i > 0: continue
254
+ if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", "")))
255
+ if not y or not m: continue
256
+ ed = "%s-%02d-%02d" % (y, int(m), int(d))
257
+
258
+ try:
259
+ duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
260
+ except Exception as e:
261
+ print("kkkkkkkkkkkkkkkkkkkk", n.get("start_time"), n.get("end_time"))
262
+
263
+ if n.get("scale"):
264
+ r = re.search(r"^([0-9]+)", str(n["scale"]))
265
+ if r: scales.append(int(r.group(1)))
266
+
267
+ if goodcorp:
268
+ if "tag_kwd" not in cv: cv["tag_kwd"] = []
269
+ cv["tag_kwd"].append("好公司")
270
+ if goodcorp_:
271
+ if "tag_kwd" not in cv: cv["tag_kwd"] = []
272
+ cv["tag_kwd"].append("好公司(曾)")
273
+
274
+ if corp_tags:
275
+ if "tag_kwd" not in cv: cv["tag_kwd"] = []
276
+ cv["tag_kwd"].extend(corp_tags)
277
+ cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
278
+
279
+ if latest_job_tm: cv["latest_job_dt"] = latest_job_tm
280
+ if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"]
281
+
282
+ if fea["position_name"]:
283
+ cv["position_name_tks"] = huqie.qie(fea["position_name"][0])
284
+ cv["position_name_sm_tks"] = huqie.qieqie(cv["position_name_tks"])
285
+ cv["pos_nm_tks"] = huqie.qie(" ".join(fea["position_name"][1:]))
286
+
287
+ if fea["industry_name"]:
288
+ cv["industry_name_tks"] = huqie.qie(fea["industry_name"][0])
289
+ cv["industry_name_sm_tks"] = huqie.qieqie(cv["industry_name_tks"])
290
+ cv["indu_nm_tks"] = huqie.qie(" ".join(fea["industry_name"][1:]))
291
+
292
+ if fea["corporation_name"]:
293
+ cv["corporation_name_kwd"] = fea["corporation_name"][0]
294
+ cv["corp_nm_kwd"] = fea["corporation_name"]
295
+ cv["corporation_name_tks"] = huqie.qie(fea["corporation_name"][0])
296
+ cv["corporation_name_sm_tks"] = huqie.qieqie(cv["corporation_name_tks"])
297
+ cv["corp_nm_tks"] = huqie.qie(" ".join(fea["corporation_name"][1:]))
298
+
299
+ if fea["responsibilities"]:
300
+ cv["responsibilities_ltks"] = huqie.qie(fea["responsibilities"][0])
301
+ cv["resp_ltks"] = huqie.qie(" ".join(fea["responsibilities"][1:]))
302
+
303
+ if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
304
+ re.match(r"[^0-9]+$", str(i))]
305
+ if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
306
+
307
+ if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])]
308
+ if not cv.get("corporation_id"): cv["corporation_id"] = []
309
+ for i in cv.get("corporation_id", []):
310
+ cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
311
+
312
+ if work_st_tm:
313
+ try:
314
+ if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm)
315
+ y, m, d = getYMD(work_st_tm)
316
+ cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
317
+ except Exception as e:
318
+ print("EXCEPTION: ", e, work_st_tm, cv.get("work_exp_flt"))
319
+
320
+ cv["job_num_int"] = 0
321
+ if duas:
322
+ cv["dua_flt"] = np.mean(duas)
323
+ cv["cur_dua_int"] = duas[0]
324
+ cv["job_num_int"] = len(duas)
325
+ if scales: cv["scale_flt"] = np.max(scales)
326
+ return cv
327
+
328
+
329
+ def turnTm2Dt(b):
330
+ if not b: return
331
+ b = str(b).strip()
332
+ if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
333
+ return b
334
+
335
+
336
+ def getYMD(b):
337
+ y, m, d = "", "", "01"
338
+ if not b: return (y, m, d)
339
+ b = turnTm2Dt(b)
340
+ if re.match(r"[0-9]{4}", b): y = int(b[:4])
341
+ r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
342
+ if r: m = r.group(1)
343
+ r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
344
+ if r: d = r.group(1)
345
+ if not d or int(d) == 0 or int(d) > 31: d = "1"
346
+ if not m or int(m) > 12 or int(m) < 1: m = "1"
347
+ return (y, m, d)
348
+
349
+
350
+ def birth(cv):
351
+ if not cv.get("birth"):
352
+ cv["integerity_flt"] *= 0.9
353
+ return cv
354
+ y, m, d = getYMD(cv["birth"])
355
+ if not m or not y: return cv
356
+ b = "%s-%02d-%02d" % (y, int(m), int(d))
357
+ cv["birth_dt"] = b
358
+ cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
359
+
360
+ cv["age_int"] = datetime.datetime.now().year - int(y)
361
+ return cv
362
+
363
+
364
+ def parse(cv):
365
+ for k in cv.keys():
366
+ if cv[k] == '\\N': cv[k] = ''
367
+ # cv = cv.asDict()
368
+ tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
369
+ "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
370
+ "position_name", "school_name", "self_remark", "title_name"]
371
+ small_tks_fld = ["corporation_name", "expect_position_name", "position_name", "school_name", "title_name"]
372
+ kwd_fld = ["address", "city", "corporation_type", "degree", "discipline_name", "expect_city_names", "email",
373
+ "expect_industry_name", "expect_position_name", "expect_type", "gender", "industry_name",
374
+ "industry_names", "political_status", "position_name", "scale", "school_name", "phone", "tel"]
375
+ num_fld = ["annual_salary", "annual_salary_from", "annual_salary_to", "expect_annual_salary", "expect_salary_from",
376
+ "expect_salary_to", "salary_month"]
377
+
378
+ is_fld = [
379
+ ("is_fertility", "已育", "未育"),
380
+ ("is_house", "有房", "没房"),
381
+ ("is_management_experience", "有管理经验", "无管理经验"),
382
+ ("is_marital", "已婚", "未婚"),
383
+ ("is_oversea", "有海外经验", "无海外经验")
384
+ ]
385
+
386
+ rmkeys = []
387
+ for k in cv.keys():
388
+ if cv[k] is None: rmkeys.append(k)
389
+ if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k)
390
+ for k in rmkeys: del cv[k]
391
+
392
+ integerity = 0.
393
+ flds_num = 0.
394
+
395
+ def hasValues(flds):
396
+ nonlocal integerity, flds_num
397
+ flds_num += len(flds)
398
+ for f in flds:
399
+ v = str(cv.get(f, ""))
400
+ if len(v) > 0 and v != '0' and v != '[]': integerity += 1
401
+
402
+ hasValues(tks_fld)
403
+ hasValues(small_tks_fld)
404
+ hasValues(kwd_fld)
405
+ hasValues(num_fld)
406
+ cv["integerity_flt"] = integerity / flds_num
407
+
408
+ if cv.get("corporation_type"):
409
+ for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
410
+ (r"[//.· <\((]+.*", ""),
411
+ (r".*(合资|民企|股份制|中外|私营|个体|Private|创业|Owned|投资).*", "民营"),
412
+ (r".*(机关|事业).*", "机关"),
413
+ (r".*(非盈利|Non-profit).*", "非盈利"),
414
+ (r".*(外企|外商|欧美|foreign|Institution|Australia|港资).*", "外企"),
415
+ (r".*国有.*", "国企"),
416
+ (r"[ ()\(\)人/·0-9-]+", ""),
417
+ (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
418
+ cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
419
+ if len(cv["corporation_type"]) < 2: del cv["corporation_type"]
420
+
421
+ if cv.get("political_status"):
422
+ for p, r in [
423
+ (r".*党员.*", "党员"),
424
+ (r".*(无党派|公民).*", "群众"),
425
+ (r".*团员.*", "团员")]:
426
+ cv["political_status"] = re.sub(p, r, cv["political_status"])
427
+ if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"]
428
+
429
+ if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
430
+
431
+ keys = list(cv.keys())
432
+ for k in keys:
433
+ # deal with json objects
434
+ if k.find("_obj") > 0:
435
+ try:
436
+ cv[k] = json_loads(cv[k])
437
+ cv[k] = [a for _, a in cv[k].items()]
438
+ nms = []
439
+ for n in cv[k]:
440
+ if type(n) != type({}) or "name" not in n or not n.get("name"): continue
441
+ n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
442
+ if not n["name"]: continue
443
+ nms.append(n["name"])
444
+ if nms:
445
+ t = k[:-4]
446
+ cv[f"{t}_kwd"] = nms
447
+ cv[f"{t}_tks"] = huqie.qie(" ".join(nms))
448
+ except Exception as e:
449
+ print("【EXCEPTION】:", str(traceback.format_exc()), cv[k])
450
+ cv[k] = []
451
+
452
+ # tokenize fields
453
+ if k in tks_fld:
454
+ cv[f"{k}_tks"] = huqie.qie(cv[k])
455
+ if k in small_tks_fld: cv[f"{k}_sm_tks"] = huqie.qie(cv[f"{k}_tks"])
456
+
457
+ # keyword fields
458
+ if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower()
459
+ for n in re.split(r"[\t,,;;. ]",
460
+ re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
461
+ ) if n]
462
+
463
+ if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k]
464
+
465
+ cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
466
+ # for name field
467
+ if cv.get("name"):
468
+ nm = re.sub(r"[\n——\-\((\+].*", "", cv["name"].strip())
469
+ nm = re.sub(r"[ \t ]+", " ", nm)
470
+ if re.match(r"[a-zA-Z ]+$", nm):
471
+ if len(nm.split(" ")) > 1:
472
+ cv["name"] = nm
473
+ else:
474
+ nm = ""
475
+ elif nm and (surname.isit(nm[0]) or surname.isit(nm[:2])):
476
+ nm = re.sub(r"[a-zA-Z]+.*", "", nm[:5])
477
+ else:
478
+ nm = ""
479
+ cv["name"] = nm.strip()
480
+ name = cv["name"]
481
+
482
+ # name pingyin and its prefix
483
+ cv["name_py_tks"] = " ".join(PY.get_pinyins(nm[:20], '')) + " " + " ".join(PY.get_pinyins(nm[:20], ' '))
484
+ cv["name_py_pref0_tks"] = ""
485
+ cv["name_py_pref_tks"] = ""
486
+ for py in PY.get_pinyins(nm[:20], ''):
487
+ for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i]
488
+ for py in PY.get_pinyins(nm[:20], ' '):
489
+ py = py.split(" ")
490
+ for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i])
491
+
492
+ cv["name_kwd"] = name
493
+ cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
494
+ cv["name_tks"] = (
495
+ huqie.qie(name) + " " + (" ".join(list(name)) if not re.match(r"[a-zA-Z ]+$", name) else "")
496
+ ) if name else ""
497
+ else:
498
+ cv["integerity_flt"] /= 2.
499
+
500
+ if cv.get("phone"):
501
+ r = re.search(r"(1[3456789][0-9]{9})", cv["phone"])
502
+ if not r:
503
+ cv["phone"] = ""
504
+ else:
505
+ cv["phone"] = r.group(1)
506
+
507
+ # deal with date fields
508
+ if cv.get("updated_at") and isinstance(cv["updated_at"], datetime.datetime):
509
+ cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
510
+ else:
511
+ y, m, d = getYMD(str(cv.get("updated_at", "")))
512
+ if not y: y = "2012"
513
+ if not m: m = "01"
514
+ if not d: d = "01"
515
+ cv["updated_at_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
516
+ # long text tokenize
517
+
518
+ if cv.get("responsibilities"): cv["responsibilities_ltks"] = huqie.qie(rmHtmlTag(cv["responsibilities"]))
519
+
520
+ # for yes or no field
521
+ fea = []
522
+ for f, y, n in is_fld:
523
+ if f not in cv: continue
524
+ if cv[f] == '是': fea.append(y)
525
+ if cv[f] == '否': fea.append(n)
526
+
527
+ if fea: cv["tag_kwd"] = fea
528
+
529
+ cv = forEdu(cv)
530
+ cv = forProj(cv)
531
+ cv = forWork(cv)
532
+ cv = birth(cv)
533
+
534
+ cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
535
+ for i in range(len(cv["corp_proj_sch_deg_kwd"])):
536
+ for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j
537
+ for i in range(len(cv["corp_proj_sch_deg_kwd"])):
538
+ if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
539
+
540
+ try:
541
+ if not cv.get("work_exp_flt") and cv.get("work_start_time"):
542
+ if re.match(r"[0-9]{9,}", str(cv["work_start_time"])):
543
+ cv["work_start_dt"] = turnTm2Dt(cv["work_start_time"])
544
+ cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365.
545
+ elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])):
546
+ y, m, d = getYMD(str(cv["work_start_time"]))
547
+ cv["work_start_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
548
+ cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
549
+ except Exception as e:
550
+ print("【EXCEPTION】", e, "==>", cv.get("work_start_time"))
551
+ if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
552
+
553
+ keys = list(cv.keys())
554
+ for k in keys:
555
+ if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k]
556
+ for k in cv.keys():
557
+ if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue
558
+ cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
559
+ keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
560
+ for k in keys:
561
+ if cv[k] <= 0: del cv[k]
562
+
563
+ cv["tob_resume_id"] = str(cv["tob_resume_id"])
564
+ cv["id"] = cv["tob_resume_id"]
565
+ print("CCCCCCCCCCCCCCC")
566
+
567
+ return dealWithInt64(cv)
568
+
569
+
570
+ def dealWithInt64(d):
571
+ if isinstance(d, dict):
572
+ for n, v in d.items():
573
+ d[n] = dealWithInt64(v)
574
+
575
+ if isinstance(d, list):
576
+ d = [dealWithInt64(t) for t in d]
577
+
578
+ if isinstance(d, np.integer): d = int(d)
579
+ return d
580
+
deepdoc/vision/ocr.py CHANGED
@@ -64,7 +64,11 @@ def load_model(model_dir, nm):
64
  if not os.path.exists(model_file_path):
65
  raise ValueError("not find model file path {}".format(
66
  model_file_path))
67
- sess = ort.InferenceSession(model_file_path)
 
 
 
 
68
  return sess, sess.get_inputs()[0]
69
 
70
 
 
64
  if not os.path.exists(model_file_path):
65
  raise ValueError("not find model file path {}".format(
66
  model_file_path))
67
+
68
+ if ort.get_device() == "GPU":
69
+ sess = ort.InferenceSession(model_file_path, providers=['CUDAExecutionProvider'])
70
+ else:
71
+ sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
72
  return sess, sess.get_inputs()[0]
73
 
74
 
rag/app/book.py CHANGED
@@ -12,7 +12,7 @@
12
  #
13
  import copy
14
  import re
15
- from deepdoc.parser import bullets_category, is_english, tokenize, remove_contents_table, \
16
  hierarchical_merge, make_colon_as_title, naive_merge, random_choices
17
  from rag.nlp import huqie
18
  from deepdoc.parser import PdfParser, DocxParser
@@ -47,7 +47,7 @@ class Pdf(PdfParser):
47
  return [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno","")) for b in self.boxes], tbls
48
 
49
 
50
- def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
51
  """
52
  Supported file formats are docx, pdf, txt.
53
  Since a book is long and not all the parts are useful, if it's a PDF,
@@ -94,7 +94,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
94
 
95
  sections = [t for t, _ in sections]
96
  # is it English
97
- eng = is_english(random_choices(sections, k=218))
98
 
99
  res = []
100
  # add tables
 
12
  #
13
  import copy
14
  import re
15
+ from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, \
16
  hierarchical_merge, make_colon_as_title, naive_merge, random_choices
17
  from rag.nlp import huqie
18
  from deepdoc.parser import PdfParser, DocxParser
 
47
  return [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno","")) for b in self.boxes], tbls
48
 
49
 
50
+ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
51
  """
52
  Supported file formats are docx, pdf, txt.
53
  Since a book is long and not all the parts are useful, if it's a PDF,
 
94
 
95
  sections = [t for t, _ in sections]
96
  # is it English
97
+ eng = lang.lower() == "english"#is_english(random_choices(sections, k=218))
98
 
99
  res = []
100
  # add tables
rag/app/laws.py CHANGED
@@ -14,7 +14,7 @@ import copy
14
  import re
15
  from io import BytesIO
16
  from docx import Document
17
- from deepdoc.parser import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \
18
  make_colon_as_title
19
  from rag.nlp import huqie
20
  from deepdoc.parser import PdfParser, DocxParser
@@ -68,7 +68,7 @@ class Pdf(PdfParser):
68
  return [b["text"] + self._line_tag(b, zoomin) for b in self.boxes]
69
 
70
 
71
- def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
72
  """
73
  Supported file formats are docx, pdf, txt.
74
  """
@@ -106,7 +106,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
106
  else: raise NotImplementedError("file type not supported yet(docx, pdf, txt supported)")
107
 
108
  # is it English
109
- eng = is_english(sections)
110
  # Remove 'Contents' part
111
  remove_contents_table(sections, eng)
112
 
 
14
  import re
15
  from io import BytesIO
16
  from docx import Document
17
+ from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \
18
  make_colon_as_title
19
  from rag.nlp import huqie
20
  from deepdoc.parser import PdfParser, DocxParser
 
68
  return [b["text"] + self._line_tag(b, zoomin) for b in self.boxes]
69
 
70
 
71
+ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
72
  """
73
  Supported file formats are docx, pdf, txt.
74
  """
 
106
  else: raise NotImplementedError("file type not supported yet(docx, pdf, txt supported)")
107
 
108
  # is it English
109
+ eng = lang.lower() == "english"#is_english(sections)
110
  # Remove 'Contents' part
111
  remove_contents_table(sections, eng)
112
 
rag/app/manual.py CHANGED
@@ -1,7 +1,6 @@
1
  import copy
2
  import re
3
- from deepdoc.parser import tokenize
4
- from rag.nlp import huqie
5
  from deepdoc.parser import PdfParser
6
  from rag.utils import num_tokens_from_string
7
 
@@ -57,7 +56,7 @@ class Pdf(PdfParser):
57
  return [b["text"] + self._line_tag(b, zoomin) for b in self.boxes], tbls
58
 
59
 
60
- def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
61
  """
62
  Only pdf is supported.
63
  """
@@ -74,7 +73,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
74
  doc["title_tks"] = huqie.qie(re.sub(r"\.[a-zA-Z]+$", "", doc["docnm_kwd"]))
75
  doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
76
  # is it English
77
- eng = pdf_parser.is_english
78
 
79
  res = []
80
  # add tables
 
1
  import copy
2
  import re
3
+ from rag.nlp import huqie, tokenize
 
4
  from deepdoc.parser import PdfParser
5
  from rag.utils import num_tokens_from_string
6
 
 
56
  return [b["text"] + self._line_tag(b, zoomin) for b in self.boxes], tbls
57
 
58
 
59
+ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
60
  """
61
  Only pdf is supported.
62
  """
 
73
  doc["title_tks"] = huqie.qie(re.sub(r"\.[a-zA-Z]+$", "", doc["docnm_kwd"]))
74
  doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
75
  # is it English
76
+ eng = lang.lower() == "english"#pdf_parser.is_english
77
 
78
  res = []
79
  # add tables
rag/app/naive.py CHANGED
@@ -13,8 +13,7 @@
13
  import copy
14
  import re
15
  from rag.app import laws
16
- from deepdoc.parser import is_english, tokenize, naive_merge
17
- from rag.nlp import huqie
18
  from deepdoc.parser import PdfParser
19
  from rag.settings import cron_logger
20
 
@@ -38,7 +37,7 @@ class Pdf(PdfParser):
38
  return [(b["text"], self._line_tag(b, zoomin)) for b in self.boxes]
39
 
40
 
41
- def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
42
  """
43
  Supported file formats are docx, pdf, txt.
44
  This method apply the naive ways to chunk files.
@@ -80,7 +79,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
80
 
81
  parser_config = kwargs.get("parser_config", {"chunk_token_num": 128, "delimiter": "\n!?。;!?"})
82
  cks = naive_merge(sections, parser_config["chunk_token_num"], parser_config["delimiter"])
83
- eng = is_english(cks)
84
  res = []
85
  # wrap up to es documents
86
  for ck in cks:
 
13
  import copy
14
  import re
15
  from rag.app import laws
16
+ from rag.nlp import huqie, is_english, tokenize, naive_merge
 
17
  from deepdoc.parser import PdfParser
18
  from rag.settings import cron_logger
19
 
 
37
  return [(b["text"], self._line_tag(b, zoomin)) for b in self.boxes]
38
 
39
 
40
+ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
41
  """
42
  Supported file formats are docx, pdf, txt.
43
  This method apply the naive ways to chunk files.
 
79
 
80
  parser_config = kwargs.get("parser_config", {"chunk_token_num": 128, "delimiter": "\n!?。;!?"})
81
  cks = naive_merge(sections, parser_config["chunk_token_num"], parser_config["delimiter"])
82
+ eng = lang.lower() == "english"#is_english(cks)
83
  res = []
84
  # wrap up to es documents
85
  for ck in cks:
rag/app/paper.py CHANGED
@@ -15,8 +15,7 @@ import re
15
  from collections import Counter
16
 
17
  from api.db import ParserType
18
- from deepdoc.parser import tokenize
19
- from rag.nlp import huqie
20
  from deepdoc.parser import PdfParser
21
  import numpy as np
22
  from rag.utils import num_tokens_from_string
@@ -140,7 +139,7 @@ class Pdf(PdfParser):
140
  }
141
 
142
 
143
- def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
144
  """
145
  Only pdf is supported.
146
  The abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.
@@ -156,7 +155,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
156
  doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
157
  doc["authors_sm_tks"] = huqie.qieqie(doc["authors_tks"])
158
  # is it English
159
- eng = pdf_parser.is_english
160
  print("It's English.....", eng)
161
 
162
  res = []
 
15
  from collections import Counter
16
 
17
  from api.db import ParserType
18
+ from rag.nlp import huqie, tokenize
 
19
  from deepdoc.parser import PdfParser
20
  import numpy as np
21
  from rag.utils import num_tokens_from_string
 
139
  }
140
 
141
 
142
+ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
143
  """
144
  Only pdf is supported.
145
  The abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.
 
155
  doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
156
  doc["authors_sm_tks"] = huqie.qieqie(doc["authors_tks"])
157
  # is it English
158
+ eng = lang.lower() == "english"#pdf_parser.is_english
159
  print("It's English.....", eng)
160
 
161
  res = []
rag/app/picture.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+ #
13
+ import io
14
+
15
+ import numpy as np
16
+ from PIL import Image
17
+
18
+ from api.db import LLMType
19
+ from api.db.services.llm_service import LLMBundle
20
+ from rag.nlp import tokenize
21
+ from deepdoc.vision import OCR
22
+
23
+ ocr = OCR()
24
+
25
+
26
+ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
27
+ try:
28
+ cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang)
29
+ except Exception as e:
30
+ callback(prog=-1, msg=str(e))
31
+ return []
32
+ img = Image.open(io.BytesIO(binary))
33
+ doc = {
34
+ "docnm_kwd": filename,
35
+ "image": img
36
+ }
37
+ bxs = ocr(np.array(img))
38
+ txt = "\n".join([t[0] for _, t in bxs if t[0]])
39
+ eng = lang.lower() == "english"
40
+ callback(0.4, "Finish OCR: (%s ...)" % txt[:12])
41
+ if (eng and len(txt.split(" ")) > 32) or len(txt) > 32:
42
+ tokenize(doc, txt, eng)
43
+ callback(0.8, "OCR results is too long to use CV LLM.")
44
+ return [doc]
45
+
46
+ try:
47
+ callback(0.4, "Use CV LLM to describe the picture.")
48
+ ans = cv_mdl.describe(binary)
49
+ callback(0.8, "CV LLM respoond: %s ..." % ans[:32])
50
+ txt += "\n" + ans
51
+ tokenize(doc, txt, eng)
52
+ return [doc]
53
+ except Exception as e:
54
+ callback(prog=-1, msg=str(e))
55
+
56
+ return []
rag/app/presentation.py CHANGED
@@ -13,46 +13,14 @@
13
  import copy
14
  import re
15
  from io import BytesIO
16
- from pptx import Presentation
17
- from deepdoc.parser import tokenize, is_english
18
  from rag.nlp import huqie
19
- from deepdoc.parser import PdfParser
20
 
21
 
22
- class Ppt(object):
23
- def __init__(self):
24
- super().__init__()
25
-
26
- def __extract(self, shape):
27
- if shape.shape_type == 19:
28
- tb = shape.table
29
- rows = []
30
- for i in range(1, len(tb.rows)):
31
- rows.append("; ".join([tb.cell(0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)]))
32
- return "\n".join(rows)
33
-
34
- if shape.has_text_frame:
35
- return shape.text_frame.text
36
-
37
- if shape.shape_type == 6:
38
- texts = []
39
- for p in shape.shapes:
40
- t = self.__extract(p)
41
- if t: texts.append(t)
42
- return "\n".join(texts)
43
-
44
  def __call__(self, fnm, from_page, to_page, callback=None):
45
- ppt = Presentation(fnm) if isinstance(
46
- fnm, str) else Presentation(
47
- BytesIO(fnm))
48
- txts = []
49
- self.total_page = len(ppt.slides)
50
- for i, slide in enumerate(ppt.slides[from_page: to_page]):
51
- texts = []
52
- for shape in slide.shapes:
53
- txt = self.__extract(shape)
54
- if txt: texts.append(txt)
55
- txts.append("\n".join(texts))
56
 
57
  callback(0.5, "Text extraction finished.")
58
  import aspose.slides as slides
 
13
  import copy
14
  import re
15
  from io import BytesIO
16
+ from rag.nlp import tokenize, is_english
 
17
  from rag.nlp import huqie
18
+ from deepdoc.parser import PdfParser, PptParser
19
 
20
 
21
+ class Ppt(PptParser):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def __call__(self, fnm, from_page, to_page, callback=None):
23
+ txts = super.__call__(fnm, from_page, to_page)
 
 
 
 
 
 
 
 
 
 
24
 
25
  callback(0.5, "Text extraction finished.")
26
  import aspose.slides as slides
rag/app/qa.py CHANGED
@@ -14,7 +14,7 @@ import re
14
  from io import BytesIO
15
  from nltk import word_tokenize
16
  from openpyxl import load_workbook
17
- from deepdoc.parser import is_english, random_choices
18
  from rag.nlp import huqie, stemmer
19
  from deepdoc.parser import ExcelParser
20
 
@@ -81,7 +81,7 @@ def beAdoc(d, q, a, eng):
81
  return d
82
 
83
 
84
- def chunk(filename, binary=None, callback=None, **kwargs):
85
  """
86
  Excel and csv(txt) format files are supported.
87
  If the file is in excel format, there should be 2 column question and answer without header.
@@ -113,7 +113,7 @@ def chunk(filename, binary=None, callback=None, **kwargs):
113
  break
114
  txt += l
115
  lines = txt.split("\n")
116
- eng = is_english([rmPrefix(l) for l in lines[:100]])
117
  fails = []
118
  for i, line in enumerate(lines):
119
  arr = [l for l in line.split("\t") if len(l) > 1]
 
14
  from io import BytesIO
15
  from nltk import word_tokenize
16
  from openpyxl import load_workbook
17
+ from rag.nlp import is_english, random_choices
18
  from rag.nlp import huqie, stemmer
19
  from deepdoc.parser import ExcelParser
20
 
 
81
  return d
82
 
83
 
84
+ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
85
  """
86
  Excel and csv(txt) format files are supported.
87
  If the file is in excel format, there should be 2 column question and answer without header.
 
113
  break
114
  txt += l
115
  lines = txt.split("\n")
116
+ eng = lang.lower() == "english"#is_english([rmPrefix(l) for l in lines[:100]])
117
  fails = []
118
  for i, line in enumerate(lines):
119
  arr = [l for l in line.split("\t") if len(l) > 1]
rag/app/table.py CHANGED
@@ -20,8 +20,7 @@ from openpyxl import load_workbook
20
  from dateutil.parser import parse as datetime_parse
21
 
22
  from api.db.services.knowledgebase_service import KnowledgebaseService
23
- from deepdoc.parser import is_english, tokenize
24
- from rag.nlp import huqie
25
  from deepdoc.parser import ExcelParser
26
 
27
 
@@ -112,7 +111,7 @@ def column_data_type(arr):
112
  return arr, ty
113
 
114
 
115
- def chunk(filename, binary=None, callback=None, **kwargs):
116
  """
117
  Excel and csv(txt) format files are supported.
118
  For csv or txt file, the delimiter between columns is TAB.
@@ -192,7 +191,7 @@ def chunk(filename, binary=None, callback=None, **kwargs):
192
  clmns_map = [(py_clmns[j] + fieds_map[clmn_tys[j]], clmns[j])
193
  for i in range(len(clmns))]
194
 
195
- eng = is_english(txts)
196
  for ii, row in df.iterrows():
197
  d = {}
198
  row_txt = []
 
20
  from dateutil.parser import parse as datetime_parse
21
 
22
  from api.db.services.knowledgebase_service import KnowledgebaseService
23
+ from rag.nlp import huqie, is_english, tokenize
 
24
  from deepdoc.parser import ExcelParser
25
 
26
 
 
111
  return arr, ty
112
 
113
 
114
+ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
115
  """
116
  Excel and csv(txt) format files are supported.
117
  For csv or txt file, the delimiter between columns is TAB.
 
191
  clmns_map = [(py_clmns[j] + fieds_map[clmn_tys[j]], clmns[j])
192
  for i in range(len(clmns))]
193
 
194
+ eng = lang.lower() == "english"#is_english(txts)
195
  for ii, row in df.iterrows():
196
  d = {}
197
  row_txt = []
rag/llm/cv_model.py CHANGED
@@ -13,12 +13,18 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
 
 
17
  from openai import OpenAI
18
  import os
19
  import base64
20
  from io import BytesIO
21
 
 
 
 
22
 
23
  class Base(ABC):
24
  def __init__(self, key, model_name):
@@ -44,25 +50,26 @@ class Base(ABC):
44
  {
45
  "role": "user",
46
  "content": [
47
- {
48
- "type": "text",
49
- "text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等。",
50
- },
51
  {
52
  "type": "image_url",
53
  "image_url": {
54
  "url": f"data:image/jpeg;base64,{b64}"
55
  },
56
  },
 
 
 
 
57
  ],
58
  }
59
  ]
60
 
61
 
62
  class GptV4(Base):
63
- def __init__(self, key, model_name="gpt-4-vision-preview"):
64
  self.client = OpenAI(api_key=key)
65
  self.model_name = model_name
 
66
 
67
  def describe(self, image, max_tokens=300):
68
  b64 = self.image2base64(image)
@@ -76,18 +83,40 @@ class GptV4(Base):
76
 
77
 
78
  class QWenCV(Base):
79
- def __init__(self, key, model_name="qwen-vl-chat-v1"):
80
  import dashscope
81
  dashscope.api_key = key
82
  self.model_name = model_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def describe(self, image, max_tokens=300):
85
  from http import HTTPStatus
86
  from dashscope import MultiModalConversation
87
  response = MultiModalConversation.call(model=self.model_name,
88
- messages=self.prompt(self.image2base64(image)))
89
  if response.status_code == HTTPStatus.OK:
90
- return response.output.choices[0]['message']['content'], response.usage.output_tokens
91
  return response.message, 0
92
 
93
 
@@ -95,9 +124,10 @@ from zhipuai import ZhipuAI
95
 
96
 
97
  class Zhipu4V(Base):
98
- def __init__(self, key, model_name="glm-4v"):
99
  self.client = ZhipuAI(api_key=key)
100
  self.model_name = model_name
 
101
 
102
  def describe(self, image, max_tokens=1024):
103
  b64 = self.image2base64(image)
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import io
17
  from abc import ABC
18
+
19
+ from PIL import Image
20
  from openai import OpenAI
21
  import os
22
  import base64
23
  from io import BytesIO
24
 
25
+ from api.utils import get_uuid
26
+ from api.utils.file_utils import get_project_base_directory
27
+
28
 
29
  class Base(ABC):
30
  def __init__(self, key, model_name):
 
50
  {
51
  "role": "user",
52
  "content": [
 
 
 
 
53
  {
54
  "type": "image_url",
55
  "image_url": {
56
  "url": f"data:image/jpeg;base64,{b64}"
57
  },
58
  },
59
+ {
60
+ "text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
61
+ "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
62
+ },
63
  ],
64
  }
65
  ]
66
 
67
 
68
  class GptV4(Base):
69
+ def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese"):
70
  self.client = OpenAI(api_key=key)
71
  self.model_name = model_name
72
+ self.lang = lang
73
 
74
  def describe(self, image, max_tokens=300):
75
  b64 = self.image2base64(image)
 
83
 
84
 
85
  class QWenCV(Base):
86
+ def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese"):
87
  import dashscope
88
  dashscope.api_key = key
89
  self.model_name = model_name
90
+ self.lang = lang
91
+
92
+ def prompt(self, binary):
93
+ # stupid as hell
94
+ tmp_dir = get_project_base_directory("tmp")
95
+ if not os.path.exists(tmp_dir): os.mkdir(tmp_dir)
96
+ path = os.path.join(tmp_dir, "%s.jpg"%get_uuid())
97
+ Image.open(io.BytesIO(binary)).save(path)
98
+ return [
99
+ {
100
+ "role": "user",
101
+ "content": [
102
+ {
103
+ "image": f"file://{path}"
104
+ },
105
+ {
106
+ "text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
107
+ "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
108
+ },
109
+ ],
110
+ }
111
+ ]
112
 
113
  def describe(self, image, max_tokens=300):
114
  from http import HTTPStatus
115
  from dashscope import MultiModalConversation
116
  response = MultiModalConversation.call(model=self.model_name,
117
+ messages=self.prompt(image))
118
  if response.status_code == HTTPStatus.OK:
119
+ return response.output.choices[0]['message']['content'][0]["text"], response.usage.output_tokens
120
  return response.message, 0
121
 
122
 
 
124
 
125
 
126
  class Zhipu4V(Base):
127
+ def __init__(self, key, model_name="glm-4v", lang="Chinese"):
128
  self.client = ZhipuAI(api_key=key)
129
  self.model_name = model_name
130
+ self.lang = lang
131
 
132
  def describe(self, image, max_tokens=1024):
133
  b64 = self.image2base64(image)
rag/nlp/__init__.py CHANGED
@@ -5,3 +5,219 @@ retrievaler = search.Dealer(ELASTICSEARCH)
5
 
6
  from nltk.stem import PorterStemmer
7
  stemmer = PorterStemmer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  from nltk.stem import PorterStemmer
7
  stemmer = PorterStemmer()
8
+
9
+ import re
10
+ from nltk import word_tokenize
11
+ from . import huqie
12
+ from rag.utils import num_tokens_from_string
13
+ import random
14
+
15
+ BULLET_PATTERN = [[
16
+ r"第[零一二三四五六七八九十百0-9]+(分?编|部分)",
17
+ r"第[零一二三四五六七八九十百0-9]+章",
18
+ r"第[零一二三四五六七八九十百0-9]+节",
19
+ r"第[零一二三四五六七八九十百0-9]+条",
20
+ r"[\((][零一二三四五六七八九十百]+[\))]",
21
+ ], [
22
+ r"第[0-9]+章",
23
+ r"第[0-9]+节",
24
+ r"[0-9]{,3}[\. 、]",
25
+ r"[0-9]{,2}\.[0-9]{,2}",
26
+ r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
27
+ r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
28
+ ], [
29
+ r"第[零一二三四五六七八九十百0-9]+章",
30
+ r"第[零一二三四五六七八九十百0-9]+节",
31
+ r"[零一二三四五六七八九十百]+[ 、]",
32
+ r"[\((][零一二三四五六七八九十百]+[\))]",
33
+ r"[\((][0-9]{,2}[\))]",
34
+ ], [
35
+ r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)",
36
+ r"Chapter (I+V?|VI*|XI|IX|X)",
37
+ r"Section [0-9]+",
38
+ r"Article [0-9]+"
39
+ ]
40
+ ]
41
+
42
+ def random_choices(arr, k):
43
+ k = min(len(arr), k)
44
+ return random.choices(arr, k=k)
45
+
46
+ def bullets_category(sections):
47
+ global BULLET_PATTERN
48
+ hits = [0] * len(BULLET_PATTERN)
49
+ for i, pro in enumerate(BULLET_PATTERN):
50
+ for sec in sections:
51
+ for p in pro:
52
+ if re.match(p, sec):
53
+ hits[i] += 1
54
+ break
55
+ maxium = 0
56
+ res = -1
57
+ for i, h in enumerate(hits):
58
+ if h <= maxium: continue
59
+ res = i
60
+ maxium = h
61
+ return res
62
+
63
+
64
+ def is_english(texts):
65
+ eng = 0
66
+ for t in texts:
67
+ if re.match(r"[a-zA-Z]{2,}", t.strip()):
68
+ eng += 1
69
+ if eng / len(texts) > 0.8:
70
+ return True
71
+ return False
72
+
73
+
74
+ def tokenize(d, t, eng):
75
+ d["content_with_weight"] = t
76
+ if eng:
77
+ t = re.sub(r"([a-z])-([a-z])", r"\1\2", t)
78
+ d["content_ltks"] = " ".join([stemmer.stem(w) for w in word_tokenize(t)])
79
+ else:
80
+ d["content_ltks"] = huqie.qie(t)
81
+ d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
82
+
83
+
84
+ def remove_contents_table(sections, eng=False):
85
+ i = 0
86
+ while i < len(sections):
87
+ def get(i):
88
+ nonlocal sections
89
+ return (sections[i] if type(sections[i]) == type("") else sections[i][0]).strip()
90
+
91
+ if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$",
92
+ re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], re.IGNORECASE)):
93
+ i += 1
94
+ continue
95
+ sections.pop(i)
96
+ if i >= len(sections): break
97
+ prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2])
98
+ while not prefix:
99
+ sections.pop(i)
100
+ if i >= len(sections): break
101
+ prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2])
102
+ sections.pop(i)
103
+ if i >= len(sections) or not prefix: break
104
+ for j in range(i, min(i + 128, len(sections))):
105
+ if not re.match(prefix, get(j)):
106
+ continue
107
+ for _ in range(i, j): sections.pop(i)
108
+ break
109
+
110
+
111
+ def make_colon_as_title(sections):
112
+ if not sections: return []
113
+ if type(sections[0]) == type(""): return sections
114
+ i = 0
115
+ while i < len(sections):
116
+ txt, layout = sections[i]
117
+ i += 1
118
+ txt = txt.split("@")[0].strip()
119
+ if not txt:
120
+ continue
121
+ if txt[-1] not in "::":
122
+ continue
123
+ txt = txt[::-1]
124
+ arr = re.split(r"([。?!!?;;]| .)", txt)
125
+ if len(arr) < 2 or len(arr[1]) < 32:
126
+ continue
127
+ sections.insert(i - 1, (arr[0][::-1], "title"))
128
+ i += 1
129
+
130
+
131
+ def hierarchical_merge(bull, sections, depth):
132
+ if not sections or bull < 0: return []
133
+ if type(sections[0]) == type(""): sections = [(s, "") for s in sections]
134
+ sections = [(t,o) for t, o in sections if t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())]
135
+ bullets_size = len(BULLET_PATTERN[bull])
136
+ levels = [[] for _ in range(bullets_size + 2)]
137
+
138
+ def not_title(txt):
139
+ if re.match(r"第[零一二三四五六七八九十百0-9]+条", txt): return False
140
+ if len(txt) >= 128: return True
141
+ return re.search(r"[,;,。;!!]", txt)
142
+
143
+ for i, (txt, layout) in enumerate(sections):
144
+ for j, p in enumerate(BULLET_PATTERN[bull]):
145
+ if re.match(p, txt.strip()) and not not_title(txt):
146
+ levels[j].append(i)
147
+ break
148
+ else:
149
+ if re.search(r"(title|head)", layout):
150
+ levels[bullets_size].append(i)
151
+ else:
152
+ levels[bullets_size + 1].append(i)
153
+ sections = [t for t, _ in sections]
154
+ for s in sections: print("--", s)
155
+
156
+ def binary_search(arr, target):
157
+ if not arr: return -1
158
+ if target > arr[-1]: return len(arr) - 1
159
+ if target < arr[0]: return -1
160
+ s, e = 0, len(arr)
161
+ while e - s > 1:
162
+ i = (e + s) // 2
163
+ if target > arr[i]:
164
+ s = i
165
+ continue
166
+ elif target < arr[i]:
167
+ e = i
168
+ continue
169
+ else:
170
+ assert False
171
+ return s
172
+
173
+ cks = []
174
+ readed = [False] * len(sections)
175
+ levels = levels[::-1]
176
+ for i, arr in enumerate(levels[:depth]):
177
+ for j in arr:
178
+ if readed[j]: continue
179
+ readed[j] = True
180
+ cks.append([j])
181
+ if i + 1 == len(levels) - 1: continue
182
+ for ii in range(i + 1, len(levels)):
183
+ jj = binary_search(levels[ii], j)
184
+ if jj < 0: continue
185
+ if jj > cks[-1][-1]: cks[-1].pop(-1)
186
+ cks[-1].append(levels[ii][jj])
187
+ for ii in cks[-1]: readed[ii] = True
188
+ for i in range(len(cks)):
189
+ cks[i] = [sections[j] for j in cks[i][::-1]]
190
+ print("--------------\n", "\n* ".join(cks[i]))
191
+
192
+ return cks
193
+
194
+
195
+ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
196
+ if not sections: return []
197
+ if type(sections[0]) == type(""): sections = [(s, "") for s in sections]
198
+ cks = [""]
199
+ tk_nums = [0]
200
+ def add_chunk(t, pos):
201
+ nonlocal cks, tk_nums, delimiter
202
+ tnum = num_tokens_from_string(t)
203
+ if tnum < 8: pos = ""
204
+ if tk_nums[-1] > chunk_token_num:
205
+ cks.append(t + pos)
206
+ tk_nums.append(tnum)
207
+ else:
208
+ cks[-1] += t + pos
209
+ tk_nums[-1] += tnum
210
+
211
+ for sec, pos in sections:
212
+ s, e = 0, 1
213
+ while e < len(sec):
214
+ if sec[e] in delimiter:
215
+ add_chunk(sec[s: e+1], pos)
216
+ s = e + 1
217
+ e = s + 1
218
+ else:
219
+ e += 1
220
+ if s < e: add_chunk(sec[s: e], pos)
221
+
222
+ return cks
223
+
rag/svr/task_executor.py CHANGED
@@ -21,6 +21,7 @@ import hashlib
21
  import copy
22
  import re
23
  import sys
 
24
  from functools import partial
25
  from timeit import default_timer as timer
26
 
@@ -36,7 +37,7 @@ from rag.nlp import search
36
  from io import BytesIO
37
  import pandas as pd
38
 
39
- from rag.app import laws, paper, presentation, manual, qa, table, book, resume
40
 
41
  from api.db import LLMType, ParserType
42
  from api.db.services.document_service import DocumentService
@@ -56,47 +57,31 @@ FACTORY = {
56
  ParserType.QA.value: qa,
57
  ParserType.TABLE.value: table,
58
  ParserType.RESUME.value: resume,
 
59
  }
60
 
61
 
62
- def set_progress(task_id, from_page=0, to_page=-1, prog=None, msg="Processing..."):
 
 
 
63
  cancel = TaskService.do_cancel(task_id)
64
  if cancel:
65
  msg += " [Canceled]"
66
  prog = -1
67
 
68
- if to_page > 0: msg = f"Page({from_page}~{to_page}): " + msg
 
69
  d = {"progress_msg": msg}
70
- if prog is not None: d["progress"] = prog
 
71
  try:
72
  TaskService.update_progress(task_id, d)
73
  except Exception as e:
74
  cron_logger.error("set_progress:({}), {}".format(task_id, str(e)))
75
 
76
- if cancel:sys.exit()
77
-
78
-
79
- """
80
- def chuck_doc(name, binary, tenant_id, cvmdl=None):
81
- suff = os.path.split(name)[-1].lower().split(".")[-1]
82
- if suff.find("pdf") >= 0:
83
- return PDF(binary)
84
- if suff.find("doc") >= 0:
85
- return DOC(binary)
86
- if re.match(r"(xlsx|xlsm|xltx|xltm)", suff):
87
- return EXC(binary)
88
- if suff.find("ppt") >= 0:
89
- return PPT(binary)
90
- if cvmdl and re.search(r"\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico)$",
91
- name.lower()):
92
- txt = cvmdl.describe(binary)
93
- field = TextChunker.Fields()
94
- field.text_chunks = [(txt, binary)]
95
- field.table_chunks = []
96
- return field
97
-
98
- return TextChunker()(binary)
99
- """
100
 
101
 
102
  def collect(comm, mod, tm):
@@ -109,29 +94,38 @@ def collect(comm, mod, tm):
109
  return tasks
110
 
111
 
112
- def build(row, cvmdl):
113
  if row["size"] > DOC_MAXIMUM_SIZE:
114
  set_progress(row["id"], prog=-1, msg="File size exceeds( <= %dMb )" %
115
  (int(DOC_MAXIMUM_SIZE / 1024 / 1024)))
116
  return []
117
 
118
- callback = partial(set_progress, row["id"], row["from_page"], row["to_page"])
 
 
 
 
119
  chunker = FACTORY[row["parser_id"].lower()]
120
  try:
121
- cron_logger.info("Chunkking {}/{}".format(row["location"], row["name"]))
122
- cks = chunker.chunk(row["name"], binary = MINIO.get(row["kb_id"], row["location"]), from_page=row["from_page"], to_page=row["to_page"],
123
- callback = callback, kb_id=row["kb_id"], parser_config=row["parser_config"])
 
 
124
  except Exception as e:
125
  if re.search("(No such file|not found)", str(e)):
126
  callback(-1, "Can not find file <%s>" % row["doc_name"])
127
  else:
128
- callback(-1, f"Internal server error: %s" % str(e).replace("'", ""))
 
 
129
 
130
- cron_logger.warn("Chunkking {}/{}: {}".format(row["location"], row["name"], str(e)))
 
131
 
132
  return
133
 
134
- callback(msg="Finished slicing files. Start to embedding the content.")
135
 
136
  docs = []
137
  doc = {
@@ -142,7 +136,8 @@ def build(row, cvmdl):
142
  d = copy.deepcopy(doc)
143
  d.update(ck)
144
  md5 = hashlib.md5()
145
- md5.update((ck["content_with_weight"] + str(d["doc_id"])).encode("utf-8"))
 
146
  d["_id"] = md5.hexdigest()
147
  d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
148
  d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
@@ -173,7 +168,8 @@ def init_kb(row):
173
 
174
 
175
  def embedding(docs, mdl, parser_config={}):
176
- tts, cnts = [rmSpace(d["title_tks"]) for d in docs if d.get("title_tks")], [d["content_with_weight"] for d in docs]
 
177
  tk_count = 0
178
  if len(tts) == len(cnts):
179
  tts, c = mdl.encode(tts)
@@ -182,7 +178,8 @@ def embedding(docs, mdl, parser_config={}):
182
  cnts, c = mdl.encode(cnts)
183
  tk_count += c
184
  title_w = float(parser_config.get("filename_embd_weight", 0.1))
185
- vects = (title_w * tts + (1-title_w) * cnts) if len(tts) == len(cnts) else cnts
 
186
 
187
  assert len(vects) == len(docs)
188
  for i, d in enumerate(docs):
@@ -192,7 +189,10 @@ def embedding(docs, mdl, parser_config={}):
192
 
193
 
194
  def main(comm, mod):
195
- tm_fnm = os.path.join(get_project_base_directory(), "rag/res", f"{comm}-{mod}.tm")
 
 
 
196
  tm = findMaxTm(tm_fnm)
197
  rows = collect(comm, mod, tm)
198
  if len(rows) == 0:
@@ -203,15 +203,13 @@ def main(comm, mod):
203
  callback = partial(set_progress, r["id"], r["from_page"], r["to_page"])
204
  try:
205
  embd_mdl = LLMBundle(r["tenant_id"], LLMType.EMBEDDING)
206
- cv_mdl = LLMBundle(r["tenant_id"], LLMType.IMAGE2TEXT)
207
- # TODO: sequence2text model
208
  except Exception as e:
209
  callback(prog=-1, msg=str(e))
210
  continue
211
 
212
- st_tm = timer()
213
- cks = build(r, cv_mdl)
214
- if cks is None:continue
215
  if not cks:
216
  tmf.write(str(r["update_time"]) + "\n")
217
  callback(1., "No chunk! Done!")
@@ -233,11 +231,15 @@ def main(comm, mod):
233
  cron_logger.error(str(es_r))
234
  else:
235
  if TaskService.do_cancel(r["id"]):
236
- ELASTICSEARCH.deleteByQuery(Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"]))
 
237
  continue
238
  callback(1., "Done!")
239
- DocumentService.increment_chunk_num(r["doc_id"], r["kb_id"], tk_count, chunk_count, 0)
240
- cron_logger.info("Chunk doc({}), token({}), chunks({})".format(r["id"], tk_count, len(cks)))
 
 
 
241
 
242
  tmf.write(str(r["update_time"]) + "\n")
243
  tmf.close()
 
21
  import copy
22
  import re
23
  import sys
24
+ import traceback
25
  from functools import partial
26
  from timeit import default_timer as timer
27
 
 
37
  from io import BytesIO
38
  import pandas as pd
39
 
40
+ from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture
41
 
42
  from api.db import LLMType, ParserType
43
  from api.db.services.document_service import DocumentService
 
57
  ParserType.QA.value: qa,
58
  ParserType.TABLE.value: table,
59
  ParserType.RESUME.value: resume,
60
+ ParserType.PICTURE.value: picture,
61
  }
62
 
63
 
64
+ def set_progress(task_id, from_page=0, to_page=-1,
65
+ prog=None, msg="Processing..."):
66
+ if prog is not None and prog < 0:
67
+ msg = "[ERROR]"+msg
68
  cancel = TaskService.do_cancel(task_id)
69
  if cancel:
70
  msg += " [Canceled]"
71
  prog = -1
72
 
73
+ if to_page > 0:
74
+ msg = f"Page({from_page}~{to_page}): " + msg
75
  d = {"progress_msg": msg}
76
+ if prog is not None:
77
+ d["progress"] = prog
78
  try:
79
  TaskService.update_progress(task_id, d)
80
  except Exception as e:
81
  cron_logger.error("set_progress:({}), {}".format(task_id, str(e)))
82
 
83
+ if cancel:
84
+ sys.exit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  def collect(comm, mod, tm):
 
94
  return tasks
95
 
96
 
97
+ def build(row):
98
  if row["size"] > DOC_MAXIMUM_SIZE:
99
  set_progress(row["id"], prog=-1, msg="File size exceeds( <= %dMb )" %
100
  (int(DOC_MAXIMUM_SIZE / 1024 / 1024)))
101
  return []
102
 
103
+ callback = partial(
104
+ set_progress,
105
+ row["id"],
106
+ row["from_page"],
107
+ row["to_page"])
108
  chunker = FACTORY[row["parser_id"].lower()]
109
  try:
110
+ cron_logger.info(
111
+ "Chunkking {}/{}".format(row["location"], row["name"]))
112
+ cks = chunker.chunk(row["name"], binary=MINIO.get(row["kb_id"], row["location"]), from_page=row["from_page"],
113
+ to_page=row["to_page"], lang=row["language"], callback=callback,
114
+ kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
115
  except Exception as e:
116
  if re.search("(No such file|not found)", str(e)):
117
  callback(-1, "Can not find file <%s>" % row["doc_name"])
118
  else:
119
+ callback(-1, f"Internal server error: %s" %
120
+ str(e).replace("'", ""))
121
+ traceback.print_exc()
122
 
123
+ cron_logger.warn(
124
+ "Chunkking {}/{}: {}".format(row["location"], row["name"], str(e)))
125
 
126
  return
127
 
128
+ callback(msg="Finished slicing files(%d). Start to embedding the content."%len(cks))
129
 
130
  docs = []
131
  doc = {
 
136
  d = copy.deepcopy(doc)
137
  d.update(ck)
138
  md5 = hashlib.md5()
139
+ md5.update((ck["content_with_weight"] +
140
+ str(d["doc_id"])).encode("utf-8"))
141
  d["_id"] = md5.hexdigest()
142
  d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
143
  d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
 
168
 
169
 
170
  def embedding(docs, mdl, parser_config={}):
171
+ tts, cnts = [rmSpace(d["title_tks"]) for d in docs if d.get("title_tks")], [
172
+ d["content_with_weight"] for d in docs]
173
  tk_count = 0
174
  if len(tts) == len(cnts):
175
  tts, c = mdl.encode(tts)
 
178
  cnts, c = mdl.encode(cnts)
179
  tk_count += c
180
  title_w = float(parser_config.get("filename_embd_weight", 0.1))
181
+ vects = (title_w * tts + (1 - title_w) *
182
+ cnts) if len(tts) == len(cnts) else cnts
183
 
184
  assert len(vects) == len(docs)
185
  for i, d in enumerate(docs):
 
189
 
190
 
191
  def main(comm, mod):
192
+ tm_fnm = os.path.join(
193
+ get_project_base_directory(),
194
+ "rag/res",
195
+ f"{comm}-{mod}.tm")
196
  tm = findMaxTm(tm_fnm)
197
  rows = collect(comm, mod, tm)
198
  if len(rows) == 0:
 
203
  callback = partial(set_progress, r["id"], r["from_page"], r["to_page"])
204
  try:
205
  embd_mdl = LLMBundle(r["tenant_id"], LLMType.EMBEDDING)
 
 
206
  except Exception as e:
207
  callback(prog=-1, msg=str(e))
208
  continue
209
 
210
+ cks = build(r)
211
+ if cks is None:
212
+ continue
213
  if not cks:
214
  tmf.write(str(r["update_time"]) + "\n")
215
  callback(1., "No chunk! Done!")
 
231
  cron_logger.error(str(es_r))
232
  else:
233
  if TaskService.do_cancel(r["id"]):
234
+ ELASTICSEARCH.deleteByQuery(
235
+ Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"]))
236
  continue
237
  callback(1., "Done!")
238
+ DocumentService.increment_chunk_num(
239
+ r["doc_id"], r["kb_id"], tk_count, chunk_count, 0)
240
+ cron_logger.info(
241
+ "Chunk doc({}), token({}), chunks({})".format(
242
+ r["id"], tk_count, len(cks)))
243
 
244
  tmf.write(str(r["update_time"]) + "\n")
245
  tmf.close()