wony617
commited on
Commit
·
159b6fa
1
Parent(s):
9e33f2c
Add pr duplication check
Browse files- README.md +1 -1
- agent/handler.py +35 -13
- agent/workflow.py +21 -6
- app.py +25 -7
- translation_result/docs/source/en/accelerator_selection.md +13 -13
- translator/content.py +14 -3
- translator/retriever.py +39 -0
README.md
CHANGED
@@ -54,7 +54,7 @@ This project was specifically created to solve [Hugging Face Transformers Issue
|
|
54 |
|
55 |
## 🎥 Demo Video
|
56 |
|
57 |
-
[
|
58 |
|
59 |
*Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
|
60 |
|
|
|
54 |
|
55 |
## 🎥 Demo Video
|
56 |
|
57 |
+
[Hugging Face i18n Agent Demo](https://youtu.be/J2MBMNk7la8?si=7867ztaU2nPN0UEo)
|
58 |
|
59 |
*Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
|
60 |
|
agent/handler.py
CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
|
|
8 |
|
9 |
from agent.workflow import (
|
10 |
report_translation_target_files,
|
|
|
11 |
translate_docs_interactive,
|
12 |
generate_github_pr,
|
13 |
)
|
@@ -70,22 +71,29 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
70 |
state.step = "find_files"
|
71 |
|
72 |
status_report, files_list = report_translation_target_files(lang, k)
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
response = f"""**✅ File search completed!**
|
76 |
|
77 |
**Status Report:**
|
78 |
{status_report}
|
79 |
-
|
80 |
**📁 Found first {len(state.files_to_translate)} files to translate:**
|
81 |
"""
|
82 |
|
83 |
if state.files_to_translate:
|
84 |
-
for i, file in enumerate(state.files_to_translate
|
85 |
response += f"\n{i}. `{file}`"
|
86 |
|
87 |
-
if len(state.files_to_translate) > 5:
|
88 |
-
|
89 |
|
90 |
response += "\n\n**🚀 Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
|
91 |
else:
|
@@ -96,7 +104,18 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
96 |
cleared_input = ""
|
97 |
selected_tab = 1 if state.files_to_translate else 0
|
98 |
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
|
102 |
def start_translation_process():
|
@@ -124,18 +143,19 @@ def start_translation_process():
|
|
124 |
original_file_link = (
|
125 |
"https://github.com/huggingface/transformers/blob/main/" + current_file
|
126 |
)
|
|
|
|
|
|
|
127 |
response = (
|
128 |
-
f"""🔄 Translation for: `{current_file}
|
129 |
"**📄 Original Content Link:**\n"
|
130 |
""
|
131 |
f"{original_file_link}\n"
|
132 |
"**🌐 Translated Content:**\n"
|
133 |
-
f"\n```\n\n{_extract_content_for_display(translated)}
|
134 |
-
f"{status}\n"
|
|
|
135 |
)
|
136 |
-
print("translated:")
|
137 |
-
print(translated)
|
138 |
-
print("extracted")
|
139 |
|
140 |
except Exception as e:
|
141 |
response = f"❌ Translation failed: {str(e)}"
|
@@ -294,8 +314,10 @@ def send_message(message, history):
|
|
294 |
|
295 |
|
296 |
# Button handlers with tab switching
|
297 |
-
def start_translate_handler(history, anthropic_key):
|
298 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
|
|
|
|
299 |
new_hist, cleared_input = handle_user_message("start translation", history)
|
300 |
selected_tabs = 2 if state.current_file_content["translated"] else 0
|
301 |
return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
|
|
|
8 |
|
9 |
from agent.workflow import (
|
10 |
report_translation_target_files,
|
11 |
+
report_in_translation_status_files,
|
12 |
translate_docs_interactive,
|
13 |
generate_github_pr,
|
14 |
)
|
|
|
71 |
state.step = "find_files"
|
72 |
|
73 |
status_report, files_list = report_translation_target_files(lang, k)
|
74 |
+
in_progress_status_report, in_progress_docs = report_in_translation_status_files(
|
75 |
+
lang
|
76 |
+
)
|
77 |
+
state.files_to_translate = (
|
78 |
+
[file[0] for file in files_list if file[0] not in in_progress_docs]
|
79 |
+
if files_list
|
80 |
+
else []
|
81 |
+
)
|
82 |
|
83 |
response = f"""**✅ File search completed!**
|
84 |
|
85 |
**Status Report:**
|
86 |
{status_report}
|
87 |
+
{in_progress_status_report}
|
88 |
**📁 Found first {len(state.files_to_translate)} files to translate:**
|
89 |
"""
|
90 |
|
91 |
if state.files_to_translate:
|
92 |
+
for i, file in enumerate(state.files_to_translate, 1):
|
93 |
response += f"\n{i}. `{file}`"
|
94 |
|
95 |
+
# if len(state.files_to_translate) > 5:
|
96 |
+
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
97 |
|
98 |
response += "\n\n**🚀 Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
|
99 |
else:
|
|
|
104 |
cleared_input = ""
|
105 |
selected_tab = 1 if state.files_to_translate else 0
|
106 |
|
107 |
+
# 드롭다운 choices로 쓸 파일 리스트 반환 추가
|
108 |
+
return (
|
109 |
+
history,
|
110 |
+
cleared_input,
|
111 |
+
update_status(),
|
112 |
+
gr.Tabs(selected=selected_tab),
|
113 |
+
update_dropdown_choices(state.files_to_translate),
|
114 |
+
)
|
115 |
+
|
116 |
+
|
117 |
+
def update_dropdown_choices(file_list):
|
118 |
+
return gr.update(choices=file_list, value=None)
|
119 |
|
120 |
|
121 |
def start_translation_process():
|
|
|
143 |
original_file_link = (
|
144 |
"https://github.com/huggingface/transformers/blob/main/" + current_file
|
145 |
)
|
146 |
+
print("Compeleted translation:\n")
|
147 |
+
print(translated)
|
148 |
+
print("----------------------------")
|
149 |
response = (
|
150 |
+
f"""🔄 Translation for: `{current_file}`\n"""
|
151 |
"**📄 Original Content Link:**\n"
|
152 |
""
|
153 |
f"{original_file_link}\n"
|
154 |
"**🌐 Translated Content:**\n"
|
155 |
+
f"\n```\n\n{_extract_content_for_display(translated)}\n```"
|
156 |
+
# f"{status}\n"
|
157 |
+
# "✅ Translation completed. The code block will be added when generating PR."
|
158 |
)
|
|
|
|
|
|
|
159 |
|
160 |
except Exception as e:
|
161 |
response = f"❌ Translation failed: {str(e)}"
|
|
|
314 |
|
315 |
|
316 |
# Button handlers with tab switching
|
317 |
+
def start_translate_handler(history, anthropic_key, file_to_translate):
|
318 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
319 |
+
|
320 |
+
state.files_to_translate = [file_to_translate]
|
321 |
new_hist, cleared_input = handle_user_message("start translation", history)
|
322 |
selected_tabs = 2 if state.current_file_content["translated"] else 0
|
323 |
return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
|
agent/workflow.py
CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
|
|
11 |
llm_translate,
|
12 |
preprocess_content,
|
13 |
)
|
14 |
-
from translator.retriever import report
|
15 |
|
16 |
# GitHub PR Agent import
|
17 |
try:
|
@@ -38,6 +38,19 @@ def report_translation_target_files(
|
|
38 |
return status_report, [[file] for file in filepath_list]
|
39 |
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
|
42 |
"""Translate documentation."""
|
43 |
# step 1. Get content from file path
|
@@ -49,13 +62,17 @@ def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
|
|
49 |
translation_lang = "Korean"
|
50 |
to_translate_with_prompt = get_full_prompt(translation_lang, to_translate)
|
51 |
|
|
|
|
|
52 |
# step 3. Translate with LLM
|
53 |
# TODO: MCP clilent 넘길 부분
|
54 |
callback_result, translated_content = llm_translate(to_translate_with_prompt)
|
55 |
-
|
|
|
56 |
# step 4. Add scaffold to translation result
|
57 |
translated_doc = fill_scaffold(content, to_translate, translated_content)
|
58 |
-
|
|
|
59 |
return callback_result, translated_doc
|
60 |
|
61 |
|
@@ -149,9 +166,7 @@ def generate_github_pr(
|
|
149 |
print(f" 📁 File: {filepath}")
|
150 |
print(f" 🌍 Language: {target_language}")
|
151 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
152 |
-
print(
|
153 |
-
f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}"
|
154 |
-
)
|
155 |
|
156 |
agent = GitHubPRAgent()
|
157 |
result = agent.run_translation_pr_workflow(
|
|
|
11 |
llm_translate,
|
12 |
preprocess_content,
|
13 |
)
|
14 |
+
from translator.retriever import report, get_github_issue_open_pr
|
15 |
|
16 |
# GitHub PR Agent import
|
17 |
try:
|
|
|
38 |
return status_report, [[file] for file in filepath_list]
|
39 |
|
40 |
|
41 |
+
def report_in_translation_status_files(translate_lang: str) -> tuple[str, list[str]]:
|
42 |
+
docs, pr_info_list = get_github_issue_open_pr(translate_lang)
|
43 |
+
|
44 |
+
status_report = ""
|
45 |
+
if docs:
|
46 |
+
status_report = f"""\n🤖 Found {len(docs)} in progress for translation.
|
47 |
+
"""
|
48 |
+
for i, file in enumerate(docs):
|
49 |
+
status_report += f"\n{i+1}. `{file}`: {pr_info_list[i]}"
|
50 |
+
status_report += "\n"
|
51 |
+
return status_report, docs
|
52 |
+
|
53 |
+
|
54 |
def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
|
55 |
"""Translate documentation."""
|
56 |
# step 1. Get content from file path
|
|
|
62 |
translation_lang = "Korean"
|
63 |
to_translate_with_prompt = get_full_prompt(translation_lang, to_translate)
|
64 |
|
65 |
+
print("to_translate_with_prompt:\n", to_translate_with_prompt)
|
66 |
+
|
67 |
# step 3. Translate with LLM
|
68 |
# TODO: MCP clilent 넘길 부분
|
69 |
callback_result, translated_content = llm_translate(to_translate_with_prompt)
|
70 |
+
print("translated_content:\n")
|
71 |
+
print(translated_content)
|
72 |
# step 4. Add scaffold to translation result
|
73 |
translated_doc = fill_scaffold(content, to_translate, translated_content)
|
74 |
+
print("translated_doc:\n")
|
75 |
+
print(translated_doc)
|
76 |
return callback_result, translated_doc
|
77 |
|
78 |
|
|
|
166 |
print(f" 📁 File: {filepath}")
|
167 |
print(f" 🌍 Language: {target_language}")
|
168 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
169 |
+
print(f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}")
|
|
|
|
|
170 |
|
171 |
agent = GitHubPRAgent()
|
172 |
result = agent.run_translation_pr_workflow(
|
app.py
CHANGED
@@ -44,6 +44,8 @@ css = """
|
|
44 |
backdrop-filter: blur(8px);
|
45 |
border: 1px solid rgba(255,255,180,0.25);
|
46 |
width: 100%;
|
|
|
|
|
47 |
}
|
48 |
.status-card {
|
49 |
width: 100%
|
@@ -91,7 +93,6 @@ css = """
|
|
91 |
with gr.Blocks(
|
92 |
css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
|
93 |
) as demo:
|
94 |
-
|
95 |
# Title
|
96 |
with open("images/hfkr_logo.png", "rb") as img_file:
|
97 |
base64_img = base64.b64encode(img_file.read()).decode()
|
@@ -122,16 +123,15 @@ with gr.Blocks(
|
|
122 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
123 |
with gr.TabItem("1. Find Files", id=0):
|
124 |
with gr.Group():
|
125 |
-
lang_dropdown = gr.
|
126 |
choices=[language.value for language in Languages],
|
127 |
label="🌍 Translate To",
|
128 |
value="ko",
|
129 |
)
|
130 |
k_input = gr.Number(
|
131 |
label="📊 First k missing translated docs",
|
132 |
-
value=
|
133 |
minimum=1,
|
134 |
-
maximum=100,
|
135 |
)
|
136 |
find_btn = gr.Button(
|
137 |
"🔍 Find Files to Translate",
|
@@ -140,6 +140,17 @@ with gr.Blocks(
|
|
140 |
|
141 |
with gr.TabItem("2. Translate", id=1):
|
142 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
translate_lang_display = gr.Dropdown(
|
144 |
choices=[language.value for language in Languages],
|
145 |
label="🌍 Translation Language",
|
@@ -186,7 +197,7 @@ with gr.Blocks(
|
|
186 |
|
187 |
# Chat Controller
|
188 |
with gr.Column(elem_classes=["control-panel"]):
|
189 |
-
gr.Markdown("### 💬 Chat with agent")
|
190 |
msg_input = gr.Textbox(
|
191 |
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
192 |
container=False,
|
@@ -199,7 +210,7 @@ with gr.Blocks(
|
|
199 |
find_btn.click(
|
200 |
fn=process_file_search_handler,
|
201 |
inputs=[lang_dropdown, k_input, chatbot],
|
202 |
-
outputs=[chatbot, msg_input, status_display, control_tabs],
|
203 |
)
|
204 |
|
205 |
# Sync language across tabs
|
@@ -209,10 +220,17 @@ with gr.Blocks(
|
|
209 |
outputs=[translate_lang_display],
|
210 |
)
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
# Button event handlers
|
213 |
start_translate_btn.click(
|
214 |
fn=start_translate_handler,
|
215 |
-
inputs=[chatbot, anthropic_key],
|
216 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
217 |
)
|
218 |
|
|
|
44 |
backdrop-filter: blur(8px);
|
45 |
border: 1px solid rgba(255,255,180,0.25);
|
46 |
width: 100%;
|
47 |
+
overflow: visible !important;
|
48 |
+
|
49 |
}
|
50 |
.status-card {
|
51 |
width: 100%
|
|
|
93 |
with gr.Blocks(
|
94 |
css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
|
95 |
) as demo:
|
|
|
96 |
# Title
|
97 |
with open("images/hfkr_logo.png", "rb") as img_file:
|
98 |
base64_img = base64.b64encode(img_file.read()).decode()
|
|
|
123 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
124 |
with gr.TabItem("1. Find Files", id=0):
|
125 |
with gr.Group():
|
126 |
+
lang_dropdown = gr.Radio(
|
127 |
choices=[language.value for language in Languages],
|
128 |
label="🌍 Translate To",
|
129 |
value="ko",
|
130 |
)
|
131 |
k_input = gr.Number(
|
132 |
label="📊 First k missing translated docs",
|
133 |
+
value=10,
|
134 |
minimum=1,
|
|
|
135 |
)
|
136 |
find_btn = gr.Button(
|
137 |
"🔍 Find Files to Translate",
|
|
|
140 |
|
141 |
with gr.TabItem("2. Translate", id=1):
|
142 |
with gr.Group():
|
143 |
+
files_to_translate = gr.Radio(
|
144 |
+
choices=[],
|
145 |
+
label="📄 Select a file to translate",
|
146 |
+
interactive=True,
|
147 |
+
value=[],
|
148 |
+
)
|
149 |
+
file_to_translate_input = gr.Textbox(
|
150 |
+
label="🌍 Select in the dropdown or write the file path to translate",
|
151 |
+
value="",
|
152 |
+
)
|
153 |
+
|
154 |
translate_lang_display = gr.Dropdown(
|
155 |
choices=[language.value for language in Languages],
|
156 |
label="🌍 Translation Language",
|
|
|
197 |
|
198 |
# Chat Controller
|
199 |
with gr.Column(elem_classes=["control-panel"]):
|
200 |
+
gr.Markdown("### 💬 Chat with agent (Only simple chat is available)")
|
201 |
msg_input = gr.Textbox(
|
202 |
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
203 |
container=False,
|
|
|
210 |
find_btn.click(
|
211 |
fn=process_file_search_handler,
|
212 |
inputs=[lang_dropdown, k_input, chatbot],
|
213 |
+
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
214 |
)
|
215 |
|
216 |
# Sync language across tabs
|
|
|
220 |
outputs=[translate_lang_display],
|
221 |
)
|
222 |
|
223 |
+
#
|
224 |
+
files_to_translate.change(
|
225 |
+
fn=lambda x: x,
|
226 |
+
inputs=[files_to_translate],
|
227 |
+
outputs=[file_to_translate_input],
|
228 |
+
)
|
229 |
+
|
230 |
# Button event handlers
|
231 |
start_translate_btn.click(
|
232 |
fn=start_translate_handler,
|
233 |
+
inputs=[chatbot, anthropic_key, file_to_translate_input],
|
234 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
235 |
)
|
236 |
|
translation_result/docs/source/en/accelerator_selection.md
CHANGED
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
|
|
16 |
|
17 |
# 가속기 선택 [[accelerator-selection]]
|
18 |
|
19 |
-
분산
|
20 |
|
21 |
이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
|
22 |
|
@@ -27,7 +27,7 @@ rendered properly in your Markdown viewer.
|
|
27 |
<hfoptions id="select-accelerator">
|
28 |
<hfoption id="torchrun">
|
29 |
|
30 |
-
`--nproc_per_node`를 사용하여 사용할 가속기 수를
|
31 |
|
32 |
```bash
|
33 |
torchrun --nproc_per_node=2 trainer-program.py ...
|
@@ -36,7 +36,7 @@ torchrun --nproc_per_node=2 trainer-program.py ...
|
|
36 |
</hfoption>
|
37 |
<hfoption id="Accelerate">
|
38 |
|
39 |
-
`--num_processes`를 사용하여 사용할 가속기 수를
|
40 |
|
41 |
```bash
|
42 |
accelerate launch --num_processes 2 trainer-program.py ...
|
@@ -45,7 +45,7 @@ accelerate launch --num_processes 2 trainer-program.py ...
|
|
45 |
</hfoption>
|
46 |
<hfoption id="DeepSpeed">
|
47 |
|
48 |
-
`--num_gpus`를 사용하여 사용할 GPU 수를
|
49 |
|
50 |
```bash
|
51 |
deepspeed --num_gpus 2 trainer-program.py ...
|
@@ -55,7 +55,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
|
|
55 |
</hfoptions>
|
56 |
|
57 |
## 가속기 순서 [[order-of-accelerators]]
|
58 |
-
사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 각
|
59 |
|
60 |
예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
|
61 |
|
@@ -66,7 +66,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
|
|
66 |
CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
|
67 |
```
|
68 |
|
69 |
-
GPU 0과 2만 PyTorch
|
70 |
순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
|
71 |
|
72 |
|
@@ -80,15 +80,15 @@ GPU 없이 실행하려면:
|
|
80 |
CUDA_VISIBLE_DEVICES= python trainer-program.py ...
|
81 |
```
|
82 |
|
83 |
-
`CUDA_DEVICE_ORDER`를 사용하여 CUDA
|
84 |
|
85 |
-
- PCIe 버스 ID
|
86 |
|
87 |
```bash
|
88 |
$hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
|
89 |
```
|
90 |
|
91 |
-
-
|
92 |
|
93 |
```bash
|
94 |
export CUDA_DEVICE_ORDER=FASTEST_FIRST
|
@@ -101,7 +101,7 @@ $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
|
|
101 |
ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
|
102 |
```
|
103 |
|
104 |
-
XPU 0과 2만 PyTorch
|
105 |
순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
|
106 |
|
107 |
```bash
|
@@ -109,13 +109,13 @@ ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
|
|
109 |
```
|
110 |
|
111 |
|
112 |
-
|
113 |
|
114 |
```bash
|
115 |
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
116 |
```
|
117 |
|
118 |
-
Intel XPU
|
119 |
|
120 |
</hfoption>
|
121 |
</hfoptions>
|
@@ -123,5 +123,5 @@ Intel XPU의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero]
|
|
123 |
|
124 |
|
125 |
> [!WARNING]
|
126 |
-
> 환경 변수는 명령줄에 추가하는 대신
|
127 |
```
|
|
|
16 |
|
17 |
# 가속기 선택 [[accelerator-selection]]
|
18 |
|
19 |
+
분산 학습 중에는 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 컴퓨팅 성능을 가진 가속기가 있을 때 더 빠른 가속기를 먼저 사용하고 싶은 경우에 유용할 수 있습니다. 또는 사용 가능한 가속기의 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed)는 필요하지 않습니다.
|
20 |
|
21 |
이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
|
22 |
|
|
|
27 |
<hfoptions id="select-accelerator">
|
28 |
<hfoption id="torchrun">
|
29 |
|
30 |
+
`--nproc_per_node`를 사용하여 사용할 가속기 수를 선택합니다.
|
31 |
|
32 |
```bash
|
33 |
torchrun --nproc_per_node=2 trainer-program.py ...
|
|
|
36 |
</hfoption>
|
37 |
<hfoption id="Accelerate">
|
38 |
|
39 |
+
`--num_processes`를 사용하여 사용할 가속기 수를 선택합니다.
|
40 |
|
41 |
```bash
|
42 |
accelerate launch --num_processes 2 trainer-program.py ...
|
|
|
45 |
</hfoption>
|
46 |
<hfoption id="DeepSpeed">
|
47 |
|
48 |
+
`--num_gpus`를 사용하여 사용할 GPU 수를 선택합니다.
|
49 |
|
50 |
```bash
|
51 |
deepspeed --num_gpus 2 trainer-program.py ...
|
|
|
55 |
</hfoptions>
|
56 |
|
57 |
## 가속기 순서 [[order-of-accelerators]]
|
58 |
+
사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 종종 각 실행에 대해 명령줄에서 설정되지만, `~/.bashrc`나 다른 시작 구성 파일에 추가할 수도 있습니다.
|
59 |
|
60 |
예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
|
61 |
|
|
|
66 |
CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
|
67 |
```
|
68 |
|
69 |
+
GPU 0과 2만 PyTorch에서 "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다.
|
70 |
순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
|
71 |
|
72 |
|
|
|
80 |
CUDA_VISIBLE_DEVICES= python trainer-program.py ...
|
81 |
```
|
82 |
|
83 |
+
`CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치의 순서를 제어할 수도 있습니다:
|
84 |
|
85 |
+
- PCIe 버스 ID 순서 (`nvidia-smi`와 일치):
|
86 |
|
87 |
```bash
|
88 |
$hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
|
89 |
```
|
90 |
|
91 |
+
- 컴퓨팅 성능 순서 (가장 빠른 것부터):
|
92 |
|
93 |
```bash
|
94 |
export CUDA_DEVICE_ORDER=FASTEST_FIRST
|
|
|
101 |
ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
|
102 |
```
|
103 |
|
104 |
+
XPU 0과 2만 PyTorch에서 "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다.
|
105 |
순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
|
106 |
|
107 |
```bash
|
|
|
109 |
```
|
110 |
|
111 |
|
112 |
+
다음을 사용하여 Intel XPU의 순서를 제어할 수도 있습니다:
|
113 |
|
114 |
```bash
|
115 |
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
116 |
```
|
117 |
|
118 |
+
Intel XPU에서의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요.
|
119 |
|
120 |
</hfoption>
|
121 |
</hfoptions>
|
|
|
123 |
|
124 |
|
125 |
> [!WARNING]
|
126 |
+
> 환경 변수는 명령줄에 추가하는 대신 내보낼 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 잘못된 가속기를 사용하게 될 수 있어 혼란을 야기할 수 있으므로 권장하지 않습니다. 대신, 같은 명령줄에서 특정 훈련 실행을 위해 환경 변수를 설정하는 것이 일반적인 관례입니다.
|
127 |
```
|
translator/content.py
CHANGED
@@ -5,6 +5,8 @@ import requests
|
|
5 |
from langchain.callbacks import get_openai_callback
|
6 |
from langchain_anthropic import ChatAnthropic
|
7 |
|
|
|
|
|
8 |
|
9 |
def get_content(filepath: str) -> str:
|
10 |
url = string.Template(
|
@@ -38,10 +40,11 @@ def get_full_prompt(language: str, to_translate: str) -> str:
|
|
38 |
"What do these sentences about Hugging Face Transformers "
|
39 |
"(a machine learning library) mean in $language? "
|
40 |
"Please do not translate the word after a 🤗 emoji "
|
41 |
-
"as it is a product name. Output
|
42 |
-
"
|
|
|
43 |
).safe_substitute(language=language)
|
44 |
-
return "\n".join([prompt, to_translate.strip(), "```"])
|
45 |
|
46 |
|
47 |
def split_markdown_sections(markdown: str) -> list:
|
@@ -64,15 +67,23 @@ def make_scaffold(content: str, to_translate: str) -> string.Template:
|
|
64 |
scaffold = content
|
65 |
for i, text in enumerate(to_translate.split("\n\n")):
|
66 |
scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
|
|
|
|
|
67 |
return string.Template(scaffold)
|
68 |
|
69 |
|
70 |
def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
|
71 |
scaffold = make_scaffold(content, to_translate)
|
|
|
|
|
72 |
divided = split_markdown_sections(to_translate)
|
|
|
|
|
73 |
anchors = get_anchors(divided)
|
74 |
|
75 |
translated = split_markdown_sections(translated)
|
|
|
|
|
76 |
|
77 |
translated[1::3] = [
|
78 |
f"{korean_title} {anchors[i]}"
|
|
|
5 |
from langchain.callbacks import get_openai_callback
|
6 |
from langchain_anthropic import ChatAnthropic
|
7 |
|
8 |
+
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
9 |
+
|
10 |
|
11 |
def get_content(filepath: str) -> str:
|
12 |
url = string.Template(
|
|
|
40 |
"What do these sentences about Hugging Face Transformers "
|
41 |
"(a machine learning library) mean in $language? "
|
42 |
"Please do not translate the word after a 🤗 emoji "
|
43 |
+
"as it is a product name. Output the complete markdown file**, with prose translated and all other content intact"
|
44 |
+
"No explanations or extras—only the translated markdown"
|
45 |
+
"\n\n```md"
|
46 |
).safe_substitute(language=language)
|
47 |
+
return "\n".join([prompt, to_translate.strip(), "```", PROMPT_WITH_GLOSSARY])
|
48 |
|
49 |
|
50 |
def split_markdown_sections(markdown: str) -> list:
|
|
|
67 |
scaffold = content
|
68 |
for i, text in enumerate(to_translate.split("\n\n")):
|
69 |
scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
|
70 |
+
print("inner scaffold:")
|
71 |
+
print(scaffold)
|
72 |
return string.Template(scaffold)
|
73 |
|
74 |
|
75 |
def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
|
76 |
scaffold = make_scaffold(content, to_translate)
|
77 |
+
print("scaffold:")
|
78 |
+
print(scaffold.template)
|
79 |
divided = split_markdown_sections(to_translate)
|
80 |
+
print("divided:")
|
81 |
+
print(divided)
|
82 |
anchors = get_anchors(divided)
|
83 |
|
84 |
translated = split_markdown_sections(translated)
|
85 |
+
print("translated:")
|
86 |
+
print(translated)
|
87 |
|
88 |
translated[1::3] = [
|
89 |
f"{korean_title} {anchors[i]}"
|
translator/retriever.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
from pathlib import Path
|
3 |
|
@@ -25,6 +26,44 @@ def get_github_repo_files():
|
|
25 |
return file_paths
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
29 |
"""
|
30 |
Retrieve missing docs
|
|
|
1 |
+
import re
|
2 |
import os
|
3 |
from pathlib import Path
|
4 |
|
|
|
26 |
return file_paths
|
27 |
|
28 |
|
29 |
+
def get_github_issue_open_pr(lang: str = "ko"):
|
30 |
+
"""
|
31 |
+
Get open PR in the github issue, filtered by title starting with '🌐 [i18n-KO]'.
|
32 |
+
"""
|
33 |
+
if lang == "ko":
|
34 |
+
issue_id = "20179"
|
35 |
+
else:
|
36 |
+
raise ValueError(
|
37 |
+
"No Github issue has been registered to the server. (Only 'ko' is supported - please contact us to support this.)"
|
38 |
+
)
|
39 |
+
|
40 |
+
url = f"https://api.github.com/repos/huggingface/transformers/pulls?state=open"
|
41 |
+
|
42 |
+
headers = {
|
43 |
+
"Accept": "application/vnd.github+json",
|
44 |
+
}
|
45 |
+
response = requests.get(url, headers=headers)
|
46 |
+
|
47 |
+
if response.status_code != 200:
|
48 |
+
raise Exception(f"GitHub API error: {response.status_code} {response.text}")
|
49 |
+
|
50 |
+
open_prs = response.json()
|
51 |
+
filtered_prs = [pr for pr in open_prs if pr["title"].startswith("🌐 [i18n-KO]")]
|
52 |
+
|
53 |
+
pattern = re.compile(r"`([^`]+\.md)`")
|
54 |
+
|
55 |
+
filenames = [
|
56 |
+
"docs/source/en/" + match.group(1)
|
57 |
+
for pr in filtered_prs
|
58 |
+
if (match := pattern.search(pr["title"]))
|
59 |
+
]
|
60 |
+
pr_info_list = [
|
61 |
+
f"https://github.com/huggingface/transformers/pull/{pr["url"].rstrip('/').split('/')[-1]}"
|
62 |
+
for pr in filtered_prs
|
63 |
+
]
|
64 |
+
return filenames, pr_info_list
|
65 |
+
|
66 |
+
|
67 |
def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
68 |
"""
|
69 |
Retrieve missing docs
|