wony617 commited on
Commit
159b6fa
·
1 Parent(s): 9e33f2c

Add pr duplication check

Browse files
README.md CHANGED
@@ -54,7 +54,7 @@ This project was specifically created to solve [Hugging Face Transformers Issue
54
 
55
  ## 🎥 Demo Video
56
 
57
- [![Hugging Face i18n Agent Demo](https://img.youtube.com/vi/YOUR_VIDEO_ID/maxresdefault.jpg)](https://www.youtube.com/watch?v=YOUR_VIDEO_ID)
58
 
59
  *Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
60
 
 
54
 
55
  ## 🎥 Demo Video
56
 
57
+ [Hugging Face i18n Agent Demo](https://youtu.be/J2MBMNk7la8?si=7867ztaU2nPN0UEo)
58
 
59
  *Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
60
 
agent/handler.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
 
9
  from agent.workflow import (
10
  report_translation_target_files,
 
11
  translate_docs_interactive,
12
  generate_github_pr,
13
  )
@@ -70,22 +71,29 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
70
  state.step = "find_files"
71
 
72
  status_report, files_list = report_translation_target_files(lang, k)
73
- state.files_to_translate = [file[0] for file in files_list] if files_list else []
 
 
 
 
 
 
 
74
 
75
  response = f"""**✅ File search completed!**
76
 
77
  **Status Report:**
78
  {status_report}
79
-
80
  **📁 Found first {len(state.files_to_translate)} files to translate:**
81
  """
82
 
83
  if state.files_to_translate:
84
- for i, file in enumerate(state.files_to_translate[:5], 1): # Show first 5
85
  response += f"\n{i}. `{file}`"
86
 
87
- if len(state.files_to_translate) > 5:
88
- response += f"\n... and {len(state.files_to_translate) - 5} more files"
89
 
90
  response += "\n\n**🚀 Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
91
  else:
@@ -96,7 +104,18 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
96
  cleared_input = ""
97
  selected_tab = 1 if state.files_to_translate else 0
98
 
99
- return history, cleared_input, update_status(), gr.Tabs(selected=selected_tab)
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  def start_translation_process():
@@ -124,18 +143,19 @@ def start_translation_process():
124
  original_file_link = (
125
  "https://github.com/huggingface/transformers/blob/main/" + current_file
126
  )
 
 
 
127
  response = (
128
- f"""🔄 Translation for: `{current_file}`**\n"""
129
  "**📄 Original Content Link:**\n"
130
  ""
131
  f"{original_file_link}\n"
132
  "**🌐 Translated Content:**\n"
133
- f"\n```\n\n{_extract_content_for_display(translated)}```\n"
134
- f"{status}\n"
 
135
  )
136
- print("translated:")
137
- print(translated)
138
- print("extracted")
139
 
140
  except Exception as e:
141
  response = f"❌ Translation failed: {str(e)}"
@@ -294,8 +314,10 @@ def send_message(message, history):
294
 
295
 
296
  # Button handlers with tab switching
297
- def start_translate_handler(history, anthropic_key):
298
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
 
 
299
  new_hist, cleared_input = handle_user_message("start translation", history)
300
  selected_tabs = 2 if state.current_file_content["translated"] else 0
301
  return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
 
8
 
9
  from agent.workflow import (
10
  report_translation_target_files,
11
+ report_in_translation_status_files,
12
  translate_docs_interactive,
13
  generate_github_pr,
14
  )
 
71
  state.step = "find_files"
72
 
73
  status_report, files_list = report_translation_target_files(lang, k)
74
+ in_progress_status_report, in_progress_docs = report_in_translation_status_files(
75
+ lang
76
+ )
77
+ state.files_to_translate = (
78
+ [file[0] for file in files_list if file[0] not in in_progress_docs]
79
+ if files_list
80
+ else []
81
+ )
82
 
83
  response = f"""**✅ File search completed!**
84
 
85
  **Status Report:**
86
  {status_report}
87
+ {in_progress_status_report}
88
  **📁 Found first {len(state.files_to_translate)} files to translate:**
89
  """
90
 
91
  if state.files_to_translate:
92
+ for i, file in enumerate(state.files_to_translate, 1):
93
  response += f"\n{i}. `{file}`"
94
 
95
+ # if len(state.files_to_translate) > 5:
96
+ # response += f"\n... and {len(state.files_to_translate) - 5} more files"
97
 
98
  response += "\n\n**🚀 Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
99
  else:
 
104
  cleared_input = ""
105
  selected_tab = 1 if state.files_to_translate else 0
106
 
107
+ # 드롭다운 choices로 파일 리스트 반환 추가
108
+ return (
109
+ history,
110
+ cleared_input,
111
+ update_status(),
112
+ gr.Tabs(selected=selected_tab),
113
+ update_dropdown_choices(state.files_to_translate),
114
+ )
115
+
116
+
117
+ def update_dropdown_choices(file_list):
118
+ return gr.update(choices=file_list, value=None)
119
 
120
 
121
  def start_translation_process():
 
143
  original_file_link = (
144
  "https://github.com/huggingface/transformers/blob/main/" + current_file
145
  )
146
+ print("Compeleted translation:\n")
147
+ print(translated)
148
+ print("----------------------------")
149
  response = (
150
+ f"""🔄 Translation for: `{current_file}`\n"""
151
  "**📄 Original Content Link:**\n"
152
  ""
153
  f"{original_file_link}\n"
154
  "**🌐 Translated Content:**\n"
155
+ f"\n```\n\n{_extract_content_for_display(translated)}\n```"
156
+ # f"{status}\n"
157
+ # "✅ Translation completed. The code block will be added when generating PR."
158
  )
 
 
 
159
 
160
  except Exception as e:
161
  response = f"❌ Translation failed: {str(e)}"
 
314
 
315
 
316
  # Button handlers with tab switching
317
+ def start_translate_handler(history, anthropic_key, file_to_translate):
318
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
319
+
320
+ state.files_to_translate = [file_to_translate]
321
  new_hist, cleared_input = handle_user_message("start translation", history)
322
  selected_tabs = 2 if state.current_file_content["translated"] else 0
323
  return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
agent/workflow.py CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
11
  llm_translate,
12
  preprocess_content,
13
  )
14
- from translator.retriever import report
15
 
16
  # GitHub PR Agent import
17
  try:
@@ -38,6 +38,19 @@ def report_translation_target_files(
38
  return status_report, [[file] for file in filepath_list]
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
42
  """Translate documentation."""
43
  # step 1. Get content from file path
@@ -49,13 +62,17 @@ def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
49
  translation_lang = "Korean"
50
  to_translate_with_prompt = get_full_prompt(translation_lang, to_translate)
51
 
 
 
52
  # step 3. Translate with LLM
53
  # TODO: MCP clilent 넘길 부분
54
  callback_result, translated_content = llm_translate(to_translate_with_prompt)
55
-
 
56
  # step 4. Add scaffold to translation result
57
  translated_doc = fill_scaffold(content, to_translate, translated_content)
58
-
 
59
  return callback_result, translated_doc
60
 
61
 
@@ -149,9 +166,7 @@ def generate_github_pr(
149
  print(f" 📁 File: {filepath}")
150
  print(f" 🌍 Language: {target_language}")
151
  print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
152
- print(
153
- f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}"
154
- )
155
 
156
  agent = GitHubPRAgent()
157
  result = agent.run_translation_pr_workflow(
 
11
  llm_translate,
12
  preprocess_content,
13
  )
14
+ from translator.retriever import report, get_github_issue_open_pr
15
 
16
  # GitHub PR Agent import
17
  try:
 
38
  return status_report, [[file] for file in filepath_list]
39
 
40
 
41
+ def report_in_translation_status_files(translate_lang: str) -> tuple[str, list[str]]:
42
+ docs, pr_info_list = get_github_issue_open_pr(translate_lang)
43
+
44
+ status_report = ""
45
+ if docs:
46
+ status_report = f"""\n🤖 Found {len(docs)} in progress for translation.
47
+ """
48
+ for i, file in enumerate(docs):
49
+ status_report += f"\n{i+1}. `{file}`: {pr_info_list[i]}"
50
+ status_report += "\n"
51
+ return status_report, docs
52
+
53
+
54
  def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
55
  """Translate documentation."""
56
  # step 1. Get content from file path
 
62
  translation_lang = "Korean"
63
  to_translate_with_prompt = get_full_prompt(translation_lang, to_translate)
64
 
65
+ print("to_translate_with_prompt:\n", to_translate_with_prompt)
66
+
67
  # step 3. Translate with LLM
68
  # TODO: MCP clilent 넘길 부분
69
  callback_result, translated_content = llm_translate(to_translate_with_prompt)
70
+ print("translated_content:\n")
71
+ print(translated_content)
72
  # step 4. Add scaffold to translation result
73
  translated_doc = fill_scaffold(content, to_translate, translated_content)
74
+ print("translated_doc:\n")
75
+ print(translated_doc)
76
  return callback_result, translated_doc
77
 
78
 
 
166
  print(f" 📁 File: {filepath}")
167
  print(f" 🌍 Language: {target_language}")
168
  print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
169
+ print(f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}")
 
 
170
 
171
  agent = GitHubPRAgent()
172
  result = agent.run_translation_pr_workflow(
app.py CHANGED
@@ -44,6 +44,8 @@ css = """
44
  backdrop-filter: blur(8px);
45
  border: 1px solid rgba(255,255,180,0.25);
46
  width: 100%;
 
 
47
  }
48
  .status-card {
49
  width: 100%
@@ -91,7 +93,6 @@ css = """
91
  with gr.Blocks(
92
  css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
93
  ) as demo:
94
-
95
  # Title
96
  with open("images/hfkr_logo.png", "rb") as img_file:
97
  base64_img = base64.b64encode(img_file.read()).decode()
@@ -122,16 +123,15 @@ with gr.Blocks(
122
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
123
  with gr.TabItem("1. Find Files", id=0):
124
  with gr.Group():
125
- lang_dropdown = gr.Dropdown(
126
  choices=[language.value for language in Languages],
127
  label="🌍 Translate To",
128
  value="ko",
129
  )
130
  k_input = gr.Number(
131
  label="📊 First k missing translated docs",
132
- value=1,
133
  minimum=1,
134
- maximum=100,
135
  )
136
  find_btn = gr.Button(
137
  "🔍 Find Files to Translate",
@@ -140,6 +140,17 @@ with gr.Blocks(
140
 
141
  with gr.TabItem("2. Translate", id=1):
142
  with gr.Group():
 
 
 
 
 
 
 
 
 
 
 
143
  translate_lang_display = gr.Dropdown(
144
  choices=[language.value for language in Languages],
145
  label="🌍 Translation Language",
@@ -186,7 +197,7 @@ with gr.Blocks(
186
 
187
  # Chat Controller
188
  with gr.Column(elem_classes=["control-panel"]):
189
- gr.Markdown("### 💬 Chat with agent")
190
  msg_input = gr.Textbox(
191
  placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
192
  container=False,
@@ -199,7 +210,7 @@ with gr.Blocks(
199
  find_btn.click(
200
  fn=process_file_search_handler,
201
  inputs=[lang_dropdown, k_input, chatbot],
202
- outputs=[chatbot, msg_input, status_display, control_tabs],
203
  )
204
 
205
  # Sync language across tabs
@@ -209,10 +220,17 @@ with gr.Blocks(
209
  outputs=[translate_lang_display],
210
  )
211
 
 
 
 
 
 
 
 
212
  # Button event handlers
213
  start_translate_btn.click(
214
  fn=start_translate_handler,
215
- inputs=[chatbot, anthropic_key],
216
  outputs=[chatbot, msg_input, status_display, control_tabs],
217
  )
218
 
 
44
  backdrop-filter: blur(8px);
45
  border: 1px solid rgba(255,255,180,0.25);
46
  width: 100%;
47
+ overflow: visible !important;
48
+
49
  }
50
  .status-card {
51
  width: 100%
 
93
  with gr.Blocks(
94
  css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
95
  ) as demo:
 
96
  # Title
97
  with open("images/hfkr_logo.png", "rb") as img_file:
98
  base64_img = base64.b64encode(img_file.read()).decode()
 
123
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
124
  with gr.TabItem("1. Find Files", id=0):
125
  with gr.Group():
126
+ lang_dropdown = gr.Radio(
127
  choices=[language.value for language in Languages],
128
  label="🌍 Translate To",
129
  value="ko",
130
  )
131
  k_input = gr.Number(
132
  label="📊 First k missing translated docs",
133
+ value=10,
134
  minimum=1,
 
135
  )
136
  find_btn = gr.Button(
137
  "🔍 Find Files to Translate",
 
140
 
141
  with gr.TabItem("2. Translate", id=1):
142
  with gr.Group():
143
+ files_to_translate = gr.Radio(
144
+ choices=[],
145
+ label="📄 Select a file to translate",
146
+ interactive=True,
147
+ value=[],
148
+ )
149
+ file_to_translate_input = gr.Textbox(
150
+ label="🌍 Select in the dropdown or write the file path to translate",
151
+ value="",
152
+ )
153
+
154
  translate_lang_display = gr.Dropdown(
155
  choices=[language.value for language in Languages],
156
  label="🌍 Translation Language",
 
197
 
198
  # Chat Controller
199
  with gr.Column(elem_classes=["control-panel"]):
200
+ gr.Markdown("### 💬 Chat with agent (Only simple chat is available)")
201
  msg_input = gr.Textbox(
202
  placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
203
  container=False,
 
210
  find_btn.click(
211
  fn=process_file_search_handler,
212
  inputs=[lang_dropdown, k_input, chatbot],
213
+ outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
214
  )
215
 
216
  # Sync language across tabs
 
220
  outputs=[translate_lang_display],
221
  )
222
 
223
+ #
224
+ files_to_translate.change(
225
+ fn=lambda x: x,
226
+ inputs=[files_to_translate],
227
+ outputs=[file_to_translate_input],
228
+ )
229
+
230
  # Button event handlers
231
  start_translate_btn.click(
232
  fn=start_translate_handler,
233
+ inputs=[chatbot, anthropic_key, file_to_translate_input],
234
  outputs=[chatbot, msg_input, status_display, control_tabs],
235
  )
236
 
translation_result/docs/source/en/accelerator_selection.md CHANGED
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
16
 
17
  # 가속기 선택 [[accelerator-selection]]
18
 
19
- 분산 훈련 중에 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 연산 성능을 가진 가속기가 있고 더 빠른 가속기를 먼저 사용하고 싶을 유용할 수 있습니다. 또는 사용 가능한 가속기 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed) 필요하지 않습니다.
20
 
21
  이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
22
 
@@ -27,7 +27,7 @@ rendered properly in your Markdown viewer.
27
  <hfoptions id="select-accelerator">
28
  <hfoption id="torchrun">
29
 
30
- `--nproc_per_node`를 사용하여 사용할 가속기 수를 선택하세요.
31
 
32
  ```bash
33
  torchrun --nproc_per_node=2 trainer-program.py ...
@@ -36,7 +36,7 @@ torchrun --nproc_per_node=2 trainer-program.py ...
36
  </hfoption>
37
  <hfoption id="Accelerate">
38
 
39
- `--num_processes`를 사용하여 사용할 가속기 수를 선택하세요.
40
 
41
  ```bash
42
  accelerate launch --num_processes 2 trainer-program.py ...
@@ -45,7 +45,7 @@ accelerate launch --num_processes 2 trainer-program.py ...
45
  </hfoption>
46
  <hfoption id="DeepSpeed">
47
 
48
- `--num_gpus`를 사용하여 사용할 GPU 수를 선택하세요.
49
 
50
  ```bash
51
  deepspeed --num_gpus 2 trainer-program.py ...
@@ -55,7 +55,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
55
  </hfoptions>
56
 
57
  ## 가속기 순서 [[order-of-accelerators]]
58
- 사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 각 실행마다 명령줄에서 설정되는 경우가 많지만, `~/.bashrc`나 다른 시작 설정 파일에 추가할 수도 있습니다.
59
 
60
  예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
61
 
@@ -66,7 +66,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
66
  CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
67
  ```
68
 
69
- GPU 0과 2만 PyTorch "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다.
70
  순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
71
 
72
 
@@ -80,15 +80,15 @@ GPU 없이 실행하려면:
80
  CUDA_VISIBLE_DEVICES= python trainer-program.py ...
81
  ```
82
 
83
- `CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치 순서를 제어할 수도 있습니다:
84
 
85
- - PCIe 버스 ID 순서로 정렬 (`nvidia-smi`와 일치):
86
 
87
  ```bash
88
  $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
89
  ```
90
 
91
- - 연산 성능 순서로 정렬 (가장 빠른 것부터):
92
 
93
  ```bash
94
  export CUDA_DEVICE_ORDER=FASTEST_FIRST
@@ -101,7 +101,7 @@ $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
101
  ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
102
  ```
103
 
104
- XPU 0과 2만 PyTorch "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다.
105
  순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
106
 
107
  ```bash
@@ -109,13 +109,13 @@ ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
109
  ```
110
 
111
 
112
- 다음으로 Intel XPU 순서를 제어할 수도 있습니다:
113
 
114
  ```bash
115
  export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
116
  ```
117
 
118
- Intel XPU 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요.
119
 
120
  </hfoption>
121
  </hfoptions>
@@ -123,5 +123,5 @@ Intel XPU의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero]
123
 
124
 
125
  > [!WARNING]
126
- > 환경 변수는 명령줄에 추가하는 대신 export할 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 결국 잘못된 가속기를 사용하게 될 수 있어 혼란스러울 수 있으므로 권장하지 않습니다. 대신, 동일한 명령줄에서 특정 훈련 실행에 대해 환경 변수를 설정하는 것이 일반적인 관례입니다.
127
  ```
 
16
 
17
  # 가속기 선택 [[accelerator-selection]]
18
 
19
+ 분산 학습 중에는 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 컴퓨팅 성능을 가진 가속기가 있을 더 빠른 가속기를 먼저 사용하고 싶은 경우에 유용할 수 있습니다. 또는 사용 가능한 가속기의 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed) 필요하지 않습니다.
20
 
21
  이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
22
 
 
27
  <hfoptions id="select-accelerator">
28
  <hfoption id="torchrun">
29
 
30
+ `--nproc_per_node`를 사용하여 사용할 가속기 수를 선택합니다.
31
 
32
  ```bash
33
  torchrun --nproc_per_node=2 trainer-program.py ...
 
36
  </hfoption>
37
  <hfoption id="Accelerate">
38
 
39
+ `--num_processes`를 사용하여 사용할 가속기 수를 선택합니다.
40
 
41
  ```bash
42
  accelerate launch --num_processes 2 trainer-program.py ...
 
45
  </hfoption>
46
  <hfoption id="DeepSpeed">
47
 
48
+ `--num_gpus`를 사용하여 사용할 GPU 수를 선택합니다.
49
 
50
  ```bash
51
  deepspeed --num_gpus 2 trainer-program.py ...
 
55
  </hfoptions>
56
 
57
  ## 가속기 순서 [[order-of-accelerators]]
58
+ 사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 종종 실행에 대해 명령줄에서 설정되지만, `~/.bashrc`나 다른 시작 구성 파일에 추가할 수도 있습니다.
59
 
60
  예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
61
 
 
66
  CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
67
  ```
68
 
69
+ GPU 0과 2만 PyTorch에서 "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다.
70
  순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
71
 
72
 
 
80
  CUDA_VISIBLE_DEVICES= python trainer-program.py ...
81
  ```
82
 
83
+ `CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치의 순서를 제어할 수도 있습니다:
84
 
85
+ - PCIe 버스 ID 순서 (`nvidia-smi`와 일치):
86
 
87
  ```bash
88
  $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
89
  ```
90
 
91
+ - 컴퓨팅 성능 순서 (가장 빠른 것부터):
92
 
93
  ```bash
94
  export CUDA_DEVICE_ORDER=FASTEST_FIRST
 
101
  ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
102
  ```
103
 
104
+ XPU 0과 2만 PyTorch에서 "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다.
105
  순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
106
 
107
  ```bash
 
109
  ```
110
 
111
 
112
+ 다음을 사용하여 Intel XPU 순서를 제어할 수도 있습니다:
113
 
114
  ```bash
115
  export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
116
  ```
117
 
118
+ Intel XPU에서의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요.
119
 
120
  </hfoption>
121
  </hfoptions>
 
123
 
124
 
125
  > [!WARNING]
126
+ > 환경 변수는 명령줄에 추가하는 대신 내보낼 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 잘못된 가속기를 사용하게 될 수 있어 혼란을 야기할 수 있으므로 권장하지 않습니다. 대신, 같은 명령줄에서 특정 훈련 실행을 위해 환경 변수를 설정하는 것이 일반적인 관례입니다.
127
  ```
translator/content.py CHANGED
@@ -5,6 +5,8 @@ import requests
5
  from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
7
 
 
 
8
 
9
  def get_content(filepath: str) -> str:
10
  url = string.Template(
@@ -38,10 +40,11 @@ def get_full_prompt(language: str, to_translate: str) -> str:
38
  "What do these sentences about Hugging Face Transformers "
39
  "(a machine learning library) mean in $language? "
40
  "Please do not translate the word after a 🤗 emoji "
41
- "as it is a product name. Output only the translated markdown result "
42
- "without any explanations or introductions.\n\n```md"
 
43
  ).safe_substitute(language=language)
44
- return "\n".join([prompt, to_translate.strip(), "```"])
45
 
46
 
47
  def split_markdown_sections(markdown: str) -> list:
@@ -64,15 +67,23 @@ def make_scaffold(content: str, to_translate: str) -> string.Template:
64
  scaffold = content
65
  for i, text in enumerate(to_translate.split("\n\n")):
66
  scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
 
 
67
  return string.Template(scaffold)
68
 
69
 
70
  def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
71
  scaffold = make_scaffold(content, to_translate)
 
 
72
  divided = split_markdown_sections(to_translate)
 
 
73
  anchors = get_anchors(divided)
74
 
75
  translated = split_markdown_sections(translated)
 
 
76
 
77
  translated[1::3] = [
78
  f"{korean_title} {anchors[i]}"
 
5
  from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
7
 
8
+ from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
9
+
10
 
11
  def get_content(filepath: str) -> str:
12
  url = string.Template(
 
40
  "What do these sentences about Hugging Face Transformers "
41
  "(a machine learning library) mean in $language? "
42
  "Please do not translate the word after a 🤗 emoji "
43
+ "as it is a product name. Output the complete markdown file**, with prose translated and all other content intact"
44
+ "No explanations or extras—only the translated markdown"
45
+ "\n\n```md"
46
  ).safe_substitute(language=language)
47
+ return "\n".join([prompt, to_translate.strip(), "```", PROMPT_WITH_GLOSSARY])
48
 
49
 
50
  def split_markdown_sections(markdown: str) -> list:
 
67
  scaffold = content
68
  for i, text in enumerate(to_translate.split("\n\n")):
69
  scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
70
+ print("inner scaffold:")
71
+ print(scaffold)
72
  return string.Template(scaffold)
73
 
74
 
75
  def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
76
  scaffold = make_scaffold(content, to_translate)
77
+ print("scaffold:")
78
+ print(scaffold.template)
79
  divided = split_markdown_sections(to_translate)
80
+ print("divided:")
81
+ print(divided)
82
  anchors = get_anchors(divided)
83
 
84
  translated = split_markdown_sections(translated)
85
+ print("translated:")
86
+ print(translated)
87
 
88
  translated[1::3] = [
89
  f"{korean_title} {anchors[i]}"
translator/retriever.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from pathlib import Path
3
 
@@ -25,6 +26,44 @@ def get_github_repo_files():
25
  return file_paths
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
29
  """
30
  Retrieve missing docs
 
1
+ import re
2
  import os
3
  from pathlib import Path
4
 
 
26
  return file_paths
27
 
28
 
29
+ def get_github_issue_open_pr(lang: str = "ko"):
30
+ """
31
+ Get open PR in the github issue, filtered by title starting with '🌐 [i18n-KO]'.
32
+ """
33
+ if lang == "ko":
34
+ issue_id = "20179"
35
+ else:
36
+ raise ValueError(
37
+ "No Github issue has been registered to the server. (Only 'ko' is supported - please contact us to support this.)"
38
+ )
39
+
40
+ url = f"https://api.github.com/repos/huggingface/transformers/pulls?state=open"
41
+
42
+ headers = {
43
+ "Accept": "application/vnd.github+json",
44
+ }
45
+ response = requests.get(url, headers=headers)
46
+
47
+ if response.status_code != 200:
48
+ raise Exception(f"GitHub API error: {response.status_code} {response.text}")
49
+
50
+ open_prs = response.json()
51
+ filtered_prs = [pr for pr in open_prs if pr["title"].startswith("🌐 [i18n-KO]")]
52
+
53
+ pattern = re.compile(r"`([^`]+\.md)`")
54
+
55
+ filenames = [
56
+ "docs/source/en/" + match.group(1)
57
+ for pr in filtered_prs
58
+ if (match := pattern.search(pr["title"]))
59
+ ]
60
+ pr_info_list = [
61
+ f"https://github.com/huggingface/transformers/pull/{pr["url"].rstrip('/').split('/')[-1]}"
62
+ for pr in filtered_prs
63
+ ]
64
+ return filenames, pr_info_list
65
+
66
+
67
  def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
68
  """
69
  Retrieve missing docs