wony617 commited on
Commit
eaea5aa
·
unverified ·
1 Parent(s): 6c707e1

Support smolagent (#5)

Browse files

* Toctree logic update

* feat: Add AWS Bedrock integration and API provider selection

This commit introduces the following enhancements:

- **API Provider Selection**: Implemented a new feature allowing users to select between Anthropic and AWS Bedrock as their preferred API provider for translation tasks.
- **AWS Bedrock Integration**: Integrated AWS Bedrock as a translation backend, enabling the use of AWS Bedrock models via a bearer token.
- **Dynamic API Key Management**: Updated the configuration panel and backend logic to dynamically manage and apply API keys based on the selected provider, ensuring only the active provider's key is used.
- **Improved PR Generation**: Enhanced the default Pull Request generation with a more detailed and structured template, including a comprehensive checklist for reviewers.
- **Model Configuration**: Added `config.py` to centralize default model definitions.

These changes provide greater flexibility in choosing translation services and improve the overall user experience for PR generation.

agent/handler.py CHANGED
@@ -33,6 +33,7 @@ class ChatState:
33
  # Persistent settings (preserved across restarts)
34
  self.persistent_settings = {
35
  "anthropic_api_key": "",
 
36
  "github_config": {
37
  "token": "",
38
  "owner": "",
@@ -374,14 +375,29 @@ def update_language_selection(lang, history):
374
  return history, "", update_status(), lang
375
 
376
 
377
- def update_persistent_config(anthropic_key, github_token, github_owner, github_repo, reference_pr_url, history):
378
  """Update persistent configuration settings."""
379
  global state
380
 
381
- # Update API keys
382
- if anthropic_key:
383
  state.persistent_settings["anthropic_api_key"] = anthropic_key
384
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
  if github_token:
387
  os.environ["GITHUB_TOKEN"] = github_token
@@ -406,8 +422,11 @@ def update_persistent_config(anthropic_key, github_token, github_owner, github_r
406
  response = "✅ Configuration saved!"
407
  if github_owner and github_repo:
408
  response += f" GitHub: {github_owner}/{github_repo}"
409
- elif anthropic_key:
 
410
  response += " Anthropic API key updated."
 
 
411
 
412
  history.append(["Configuration update", response])
413
  return history, "", update_status()
@@ -457,12 +476,20 @@ def send_message(message, history):
457
  def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
458
  # Use persistent anthropic key
459
  anthropic_key = state.persistent_settings["anthropic_api_key"]
460
- if not anthropic_key:
461
- response = "❌ Please set Anthropic API key in Configuration panel first."
 
 
462
  history.append(["Translation request", response])
463
  return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
464
 
465
- os.environ["ANTHROPIC_API_KEY"] = anthropic_key
 
 
 
 
 
 
466
 
467
  # Check if file path is provided
468
  if not file_to_translate or not file_to_translate.strip():
@@ -602,6 +629,8 @@ def restart_handler(history):
602
  # Restore environment variables
603
  if backup_settings["anthropic_api_key"]:
604
  os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
 
 
605
  if backup_settings["github_config"]["token"]:
606
  os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
607
 
 
33
  # Persistent settings (preserved across restarts)
34
  self.persistent_settings = {
35
  "anthropic_api_key": "",
36
+ "aws_bearer_token_bedrock": "",
37
  "github_config": {
38
  "token": "",
39
  "owner": "",
 
375
  return history, "", update_status(), lang
376
 
377
 
378
+ def update_persistent_config(api_provider, anthropic_key, aws_bearer_token_bedrock, github_token, github_owner, github_repo, reference_pr_url, history):
379
  """Update persistent configuration settings."""
380
  global state
381
 
382
+ # Update API keys based on provider selection
383
+ if api_provider == "Anthropic":
384
  state.persistent_settings["anthropic_api_key"] = anthropic_key
385
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
386
+ # Clear AWS Bedrock token if Anthropic is selected
387
+ state.persistent_settings["aws_bearer_token_bedrock"] = ""
388
+ os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None)
389
+ elif api_provider == "AWS Bedrock":
390
+ state.persistent_settings["aws_bearer_token_bedrock"] = aws_bearer_token_bedrock
391
+ os.environ["AWS_BEARER_TOKEN_BEDROCK"] = aws_bearer_token_bedrock
392
+ # Clear Anthropic key if AWS Bedrock is selected
393
+ state.persistent_settings["anthropic_api_key"] = ""
394
+ os.environ.pop("ANTHROPIC_API_KEY", None)
395
+ else:
396
+ # If no provider is selected or unknown, clear both
397
+ state.persistent_settings["anthropic_api_key"] = ""
398
+ os.environ.pop("ANTHROPIC_API_KEY", None)
399
+ state.persistent_settings["aws_bearer_token_bedrock"] = ""
400
+ os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None)
401
 
402
  if github_token:
403
  os.environ["GITHUB_TOKEN"] = github_token
 
422
  response = "✅ Configuration saved!"
423
  if github_owner and github_repo:
424
  response += f" GitHub: {github_owner}/{github_repo}"
425
+
426
+ if api_provider == "Anthropic" and anthropic_key:
427
  response += " Anthropic API key updated."
428
+ elif api_provider == "AWS Bedrock" and aws_bearer_token_bedrock:
429
+ response += " AWS Bedrock Bearer Token updated."
430
 
431
  history.append(["Configuration update", response])
432
  return history, "", update_status()
 
476
  def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
477
  # Use persistent anthropic key
478
  anthropic_key = state.persistent_settings["anthropic_api_key"]
479
+ aws_bearer_token_bedrock = state.persistent_settings["aws_bearer_token_bedrock"]
480
+
481
+ if not anthropic_key and not aws_bearer_token_bedrock:
482
+ response = "❌ Please set either Anthropic API key or AWS Bearer Token for Bedrock in Configuration panel first."
483
  history.append(["Translation request", response])
484
  return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
485
 
486
+ # Set the active API key to environment variable for translator.content.py
487
+ if anthropic_key:
488
+ os.environ["ANTHROPIC_API_KEY"] = anthropic_key
489
+ os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None) # Ensure only one is active
490
+ elif aws_bearer_token_bedrock:
491
+ os.environ["AWS_BEARER_TOKEN_BEDROCK"] = aws_bearer_token_bedrock
492
+ os.environ.pop("ANTHROPIC_API_KEY", None) # Ensure only one is active
493
 
494
  # Check if file path is provided
495
  if not file_to_translate or not file_to_translate.strip():
 
629
  # Restore environment variables
630
  if backup_settings["anthropic_api_key"]:
631
  os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
632
+ if backup_settings["aws_bearer_token_bedrock"]:
633
+ os.environ["AWS_BEARER_TOKEN_BEDROCK"] = backup_settings["aws_bearer_token_bedrock"]
634
  if backup_settings["github_config"]["token"]:
635
  os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
636
 
agent/toctree_handler.py CHANGED
@@ -134,8 +134,10 @@ Korean title:"""
134
  english_title = self.find_title_for_local(en_toctree, target_local)
135
 
136
  if not english_title:
137
- print(f"Could not find English title for local: {target_local}")
138
- return ko_toctree
 
 
139
 
140
  print(f"Found English title: {english_title} for local: {target_local}")
141
 
@@ -153,10 +155,11 @@ Korean title:"""
153
  )
154
 
155
  if updated:
156
- print(f"Successfully updated translation entry: local={target_local}, title={korean_title}")
157
  return updated_toctree
158
  else:
159
- print(f"Could not find '(번역중) {english_title}' entry to update")
 
160
  return ko_toctree
161
 
162
  except Exception as e:
@@ -192,8 +195,8 @@ Korean title:"""
192
  # Get Korean toctree
193
  ko_toctree = self.get_ko_toctree()
194
 
195
- # Update Korean toctree with replacement logic
196
- updated_ko_toctree = self.create_updated_toctree_with_replacement(ko_toctree, filepath_without_prefix)
197
 
198
  if not updated_ko_toctree:
199
  print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}")
@@ -287,3 +290,130 @@ Korean title:"""
287
  "status": "error",
288
  "message": f"Error updating toctree: {str(e)}"
289
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  english_title = self.find_title_for_local(en_toctree, target_local)
135
 
136
  if not english_title:
137
+ print(f"⚠️ Toctree entry not found: '{target_local}' not in English toctree")
138
+ print(f"🔍 Attempting to find appropriate section for new entry...")
139
+ # Try to add new entry in appropriate location
140
+ return self.add_new_toctree_entry(ko_toctree, target_local)
141
 
142
  print(f"Found English title: {english_title} for local: {target_local}")
143
 
 
155
  )
156
 
157
  if updated:
158
+ print(f"Successfully updated translation entry: local={target_local}, title={korean_title}")
159
  return updated_toctree
160
  else:
161
+ print(f"⚠️ Toctree update skipped: '(번역중) {english_title}' entry not found")
162
+ print(f"📋 This may be a new file not yet added to Korean toctree")
163
  return ko_toctree
164
 
165
  except Exception as e:
 
195
  # Get Korean toctree
196
  ko_toctree = self.get_ko_toctree()
197
 
198
+ # Use diff-merge algorithm to add new entry
199
+ updated_ko_toctree = self.add_new_toctree_entry(ko_toctree, filepath_without_prefix)
200
 
201
  if not updated_ko_toctree:
202
  print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}")
 
290
  "status": "error",
291
  "message": f"Error updating toctree: {str(e)}"
292
  }
293
+
294
+ def add_new_toctree_entry(self, ko_toctree: list, target_local: str) -> list:
295
+ """Add new toctree entry using diff-merge algorithm"""
296
+ try:
297
+ import copy
298
+ updated_toctree = copy.deepcopy(ko_toctree)
299
+
300
+ # Generate new entry
301
+ filename = target_local.split('/')[-1].replace('_', ' ').title()
302
+ korean_title = self.translate_title(filename)
303
+ new_entry = {
304
+ 'local': target_local,
305
+ 'title': korean_title
306
+ }
307
+
308
+ # Get English toctree for structure reference
309
+ en_toctree = self.get_en_toctree()
310
+
311
+ # Use diff-merge algorithm
312
+ if self.merge_toctree_sections(en_toctree, updated_toctree, target_local, new_entry):
313
+ return updated_toctree
314
+ else:
315
+ # Fallback: add to root level
316
+ updated_toctree.append(new_entry)
317
+ print(f"✅ Added new entry at root level: {target_local} -> {korean_title}")
318
+ return updated_toctree
319
+
320
+ except Exception as e:
321
+ print(f"❌ Error adding new toctree entry: {e}")
322
+ return ko_toctree
323
+
324
+ def merge_toctree_sections(self, en_toctree: list, ko_toctree: list, target_local: str, new_entry: dict) -> bool:
325
+ """Merge English toctree structure into Korean toctree for target_local"""
326
+ for en_section in en_toctree:
327
+ en_title = en_section.get('title')
328
+
329
+ # Check if this English section contains our target
330
+ if self.contains_target(en_section, target_local):
331
+ # Find matching Korean section
332
+ ko_section = self.find_matching_section(ko_toctree, en_title)
333
+
334
+ if ko_section:
335
+ # Section exists - merge subsections
336
+ return self.merge_subsections(en_section, ko_section, target_local, new_entry)
337
+ else:
338
+ # Section doesn't exist - create new section
339
+ new_ko_section = self.create_section_with_order(en_section, target_local, new_entry)
340
+ ko_toctree.append(new_ko_section)
341
+ print(f"✅ Created new section '{new_ko_section.get('title')}' with ordered structure")
342
+ return True
343
+ return False
344
+
345
+ def contains_target(self, section: dict, target_local: str) -> bool:
346
+ """Check if section contains target_local recursively"""
347
+ if 'sections' in section:
348
+ for subsection in section['sections']:
349
+ if subsection.get('local') == target_local:
350
+ return True
351
+ if self.contains_target(subsection, target_local):
352
+ return True
353
+ return False
354
+
355
+ def find_matching_section(self, ko_toctree: list, en_title: str) -> dict:
356
+ """Find Korean section that matches English title"""
357
+ # Try exact match first
358
+ for item in ko_toctree:
359
+ if item.get('title') == en_title:
360
+ return item
361
+
362
+ # Try translated title match
363
+ try:
364
+ translated_title = self.translate_title(en_title)
365
+ for item in ko_toctree:
366
+ if item.get('title') == translated_title:
367
+ return item
368
+ except:
369
+ pass
370
+
371
+ return None
372
+
373
+ def merge_subsections(self, en_section: dict, ko_section: dict, target_local: str, new_entry: dict) -> bool:
374
+ """Merge subsections while maintaining order"""
375
+ if 'sections' not in en_section:
376
+ return False
377
+
378
+ # Find target index in English sections
379
+ target_index = -1
380
+ for i, en_subsection in enumerate(en_section['sections']):
381
+ if en_subsection.get('local') == target_local:
382
+ target_index = i
383
+ break
384
+
385
+ if target_index == -1:
386
+ return False
387
+
388
+ # Ensure Korean section has sections array
389
+ if 'sections' not in ko_section:
390
+ ko_section['sections'] = []
391
+
392
+ # Insert at correct position
393
+ self.insert_at_correct_position(ko_section, target_index, new_entry)
394
+ return True
395
+
396
+ def insert_at_correct_position(self, ko_section: dict, target_index: int, new_entry: dict):
397
+ """Insert entry at correct position, expanding array if needed"""
398
+ sections = ko_section['sections']
399
+
400
+ # Expand sections array if needed
401
+ while len(sections) <= target_index:
402
+ sections.append(None) # Placeholder
403
+
404
+ # Insert new entry
405
+ sections[target_index] = new_entry
406
+
407
+ # Clean up None placeholders at the end
408
+ while sections and sections[-1] is None:
409
+ sections.pop()
410
+
411
+ def create_section_with_order(self, en_section: dict, target_local: str, new_entry: dict) -> dict:
412
+ """Create new Korean section with only the translated entry"""
413
+ new_ko_section = {
414
+ 'title': self.translate_title(en_section.get('title')),
415
+ 'isExpanded': en_section.get('isExpanded', False),
416
+ 'sections': [new_entry] # Only add the translated entry
417
+ }
418
+
419
+ return new_ko_section
agent/workflow.py CHANGED
@@ -261,30 +261,58 @@ def generate_github_pr(
261
  {result["message"]}"""
262
 
263
  elif result["status"] == "partial_success":
264
- return f"""⚠️ **Partial Success**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
 
 
 
266
  🌿 **Branch:** {result["branch"]}
267
  📁 **File:** {result["file_path"]}{toctree_status}
268
 
269
  {result["message"]}
270
 
271
  **Error Details:**
272
- {result.get("error_details", "Unknown error")}"""
 
 
 
 
 
273
 
274
  else:
275
  error_details = result.get("error_details", "No additional details")
276
  return f"""❌ **GitHub PR Creation Failed**
277
 
 
 
 
 
278
  **Error Message:**
279
  {result["message"]}
280
 
281
  **Error Details:**
282
  {error_details}
283
 
284
- 💡 **Common Solutions:**
285
- 1. **Project Mismatch**: Selected project '{project}' but fork is '{github_config.get('repo_name', 'REPO')}' - ensure they match
286
- 2. Check if your GitHub fork exists: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
287
- 3. Verify GitHub token has write access to your fork"""
288
 
289
  except Exception as e:
290
  error_msg = f"""❌ **Unexpected Error During PR Creation**
 
261
  {result["message"]}"""
262
 
263
  elif result["status"] == "partial_success":
264
+ error_details = result.get("error_details", "Unknown error")
265
+
266
+ # Check if it's "existing PR" case (not really an error)
267
+ if "Existing PR found" in error_details:
268
+ existing_pr_url = error_details.split(": ")[-1] if ": " in error_details else "Unknown"
269
+ return f"""🔄 **Translation Updated Successfully**
270
+
271
+ 🎯 **Selected Project:** {project}
272
+ 🌿 **Branch:** {result["branch"]}
273
+ 📁 **File:** {result["file_path"]}{toctree_status}
274
+
275
+ 🔗 **Existing PR Updated:** {existing_pr_url}
276
+
277
+ ✅ Your translation has been added to the existing PR. The file and toctree have been successfully updated!"""
278
+ else:
279
+ # Actual error case
280
+ return f"""⚠️ **Partial Success**
281
 
282
+ 🎯 **Selected Project:** {project}
283
+ 🏠 **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
284
+ 🎯 **Target Base:** {base_owner}/{base_repo}
285
  🌿 **Branch:** {result["branch"]}
286
  📁 **File:** {result["file_path"]}{toctree_status}
287
 
288
  {result["message"]}
289
 
290
  **Error Details:**
291
+ {error_details}
292
+
293
+ 💡 **Project-Repository Mismatch Check:**
294
+ - Selected project '{project}' should match repository '{github_config.get('repo_name', 'REPO')}'
295
+ - For smolagents: use Jwaminju/smolagents fork
296
+ - For transformers: use Jwaminju/transformers fork"""
297
 
298
  else:
299
  error_details = result.get("error_details", "No additional details")
300
  return f"""❌ **GitHub PR Creation Failed**
301
 
302
+ 🎯 **Selected Project:** {project}
303
+ 🏠 **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
304
+ 🎯 **Target Base:** {base_owner}/{base_repo}
305
+
306
  **Error Message:**
307
  {result["message"]}
308
 
309
  **Error Details:**
310
  {error_details}
311
 
312
+ 💡 **Project-Repository Mismatch:**
313
+ Selected project '{project}' but configured repository '{github_config.get('repo_name', 'REPO')}'
314
+ For smolagents project: use 'smolagents' repository
315
+ For transformers project: use 'transformers' repository"""
316
 
317
  except Exception as e:
318
  error_msg = f"""❌ **Unexpected Error During PR Creation**
app.py CHANGED
@@ -138,10 +138,23 @@ with gr.Blocks(
138
  gr.Markdown("### ⚙️ Configuration")
139
 
140
  with gr.Accordion("🔧 API & GitHub Settings", open=True):
 
 
 
 
 
 
141
  config_anthropic_key = gr.Textbox(
142
  label="🔑 Anthropic API Key",
143
  type="password",
144
  placeholder="sk-ant-...",
 
 
 
 
 
 
 
145
  )
146
  config_github_token = gr.Textbox(
147
  label="🔑 GitHub Token (Required for PR, Optional for file search)",
@@ -316,10 +329,20 @@ with gr.Blocks(
316
  # Configuration Save
317
  save_config_btn.click(
318
  fn=update_persistent_config,
319
- inputs=[config_anthropic_key, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
320
  outputs=[chatbot, msg_input, status_display],
321
  )
322
 
 
 
 
 
 
 
 
 
 
 
323
  approve_btn.click(
324
  fn=approve_handler,
325
  inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
 
138
  gr.Markdown("### ⚙️ Configuration")
139
 
140
  with gr.Accordion("🔧 API & GitHub Settings", open=True):
141
+ api_provider_radio = gr.Radio(
142
+ ["Anthropic", "AWS Bedrock"],
143
+ label="Select API Provider",
144
+ value="Anthropic", # Default selection
145
+ interactive=True,
146
+ )
147
  config_anthropic_key = gr.Textbox(
148
  label="🔑 Anthropic API Key",
149
  type="password",
150
  placeholder="sk-ant-...",
151
+ visible=True, # Initially visible as Anthropic is default
152
+ )
153
+ config_aws_bearer_token_bedrock = gr.Textbox(
154
+ label="🔑 AWS Bearer Token for Bedrock",
155
+ type="password",
156
+ placeholder="AWS_BEARER_TOKEN_BEDROCK",
157
+ visible=False, # Initially hidden
158
  )
159
  config_github_token = gr.Textbox(
160
  label="🔑 GitHub Token (Required for PR, Optional for file search)",
 
329
  # Configuration Save
330
  save_config_btn.click(
331
  fn=update_persistent_config,
332
+ inputs=[api_provider_radio, config_anthropic_key, config_aws_bearer_token_bedrock, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
333
  outputs=[chatbot, msg_input, status_display],
334
  )
335
 
336
+ # API Provider selection handler
337
+ api_provider_radio.change(
338
+ fn=lambda provider: (
339
+ gr.update(visible=True) if provider == "Anthropic" else gr.update(visible=False),
340
+ gr.update(visible=True) if provider == "AWS Bedrock" else gr.update(visible=False),
341
+ ),
342
+ inputs=[api_provider_radio],
343
+ outputs=[config_anthropic_key, config_aws_bearer_token_bedrock],
344
+ )
345
+
346
  approve_btn.click(
347
  fn=approve_handler,
348
  inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+
3
+ # 기본 모델 목록
4
+ default_models = [
5
+ "Helsinki-NLP/opus-mt-ko-en",
6
+ "Helsinki-NLP/opus-mt-tc-big-en-ko",
7
+ "davidkim205/iris-7b",
8
+ "maywell/Synatra-7B-v0.3-Translation",
9
+ "CUSTOM_MODEL_INPUT" # Placeholder for custom model input
10
+ ]
pr_generator/agent.py CHANGED
@@ -11,6 +11,7 @@ from typing import Optional, Dict, List, Tuple, Any
11
 
12
  # Load environment variables from .env file
13
  from dotenv import load_dotenv
 
14
 
15
  load_dotenv()
16
 
@@ -259,8 +260,8 @@ class GitHubPRAgent:
259
  ) -> str:
260
  """Generate text using LLM."""
261
  try:
262
- response = self.llm.invoke(prompt)
263
- generated = response.content.strip()
264
  print(f"LLM generated {operation}: {generated}")
265
  return generated
266
  except Exception as e:
@@ -271,25 +272,10 @@ class GitHubPRAgent:
271
  def generate_branch_name_from_reference(
272
  self, reference_branch_name: str, target_language: str, file_name: str
273
  ) -> str:
274
- """Use LLM to analyze reference PR information and generate appropriate branch name."""
275
- prompt = f"""Here is the reference PR information:
276
-
277
- Reference PR branch name: {reference_branch_name}
278
-
279
- Now I need to generate a branch name for a new translation task:
280
- - Target language: {target_language}
281
- - File to translate: {file_name}
282
-
283
- Please analyze the pattern and style of the reference PR title to generate a consistent new branch name.
284
-
285
- Requirements:
286
- 1. Follow the naming conventions and patterns of the reference PR
287
- 2. Appropriately reflect the target language ({target_language}) and file name ({file_name}) if applicable
288
-
289
- Please return only the branch name. No other explanation is needed."""
290
-
291
- fallback = f"translate-{target_language}-{file_name.replace('_', '-')}"
292
- return self._generate_with_llm(prompt, fallback, "branch name")
293
 
294
  def generate_pr_content_from_reference(
295
  self,
@@ -328,8 +314,8 @@ Title: [PR title here]
328
  Body: [PR body here, maintaining the exact markdown format and structure of the original]"""
329
 
330
  try:
331
- response = self.llm.invoke(prompt)
332
- generated_content = response.content.strip()
333
 
334
  # Separate title and body from response
335
  lines = generated_content.split("\n")
@@ -370,47 +356,46 @@ Body: [PR body here, maintaining the exact markdown format and structure of the
370
  self, target_language: str, filepath: str, target_filepath: str, file_name: str
371
  ) -> Tuple[str, str]:
372
  """Generate default PR content."""
373
- title = f"[i18n-{target_language}] Add {target_language} translation for {file_name}"
374
- body = f"""## Summary
375
- Add {target_language} translation for `{filepath}`.
376
-
377
- ## Changes
378
- - Add {target_language} translation: `{target_filepath}`
379
- - Original file: `{filepath}`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  """
381
  return title, body
382
 
383
  def generate_commit_message_from_reference(
384
  self, commit_messages: List[str], target_language: str, file_name: str
385
  ) -> str:
386
- """Use LLM to analyze reference PR commit messages and generate appropriate commit message."""
387
- commits_text = (
388
- "\n".join([f"- {msg}" for msg in commit_messages])
389
- if commit_messages
390
- else "None"
391
- )
392
-
393
- prompt = f"""Here are the commit messages from the reference PR:
394
-
395
- {commits_text}
396
-
397
- Now I need to generate a commit message for a new translation task:
398
- - Target language: {target_language}
399
- - File to translate: {file_name}
400
-
401
- Please analyze the commit message patterns and style of the reference PR to generate a consistent new commit message.
402
-
403
- Requirements:
404
- 1. Follow the commit message style and format of the reference PR
405
- 2. Appropriately reflect the target language ({target_language}) and file name ({file_name})
406
- 3. Follow general Git commit message conventions
407
- 4. Be concise and clear
408
- 5. If you detect typos in the given commit messages, use corrected versions (e.g., dos -> docs)
409
-
410
- Please return only the commit message. No other explanation is needed."""
411
-
412
- fallback = f"docs: add {target_language} translation for {file_name}"
413
- return self._generate_with_llm(prompt, fallback, "commit message")
414
 
415
  def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
416
  """Get information about an existing branch."""
@@ -452,7 +437,7 @@ Please return only the commit message. No other explanation is needed."""
452
 
453
  # 2. Generate translation file path and branch name
454
  target_filepath = filepath.replace("/en/", f"/{target_language}/")
455
- file_name = filepath.split("/")[-1].replace(".md", "")
456
 
457
  print(f"🌿 Generating branch name...")
458
  branch_name = self.generate_branch_name_from_reference(
 
11
 
12
  # Load environment variables from .env file
13
  from dotenv import load_dotenv
14
+ from translator.content import llm_translate
15
 
16
  load_dotenv()
17
 
 
260
  ) -> str:
261
  """Generate text using LLM."""
262
  try:
263
+ _usage_info, generated = llm_translate(prompt)
264
+ generated = generated.strip()
265
  print(f"LLM generated {operation}: {generated}")
266
  return generated
267
  except Exception as e:
 
272
  def generate_branch_name_from_reference(
273
  self, reference_branch_name: str, target_language: str, file_name: str
274
  ) -> str:
275
+ """Generate branch name using simple template."""
276
+ # Keep .md extension and make branch-safe
277
+ branch_safe_name = file_name.replace('_', '-')
278
+ return f"{target_language}-{branch_safe_name}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  def generate_pr_content_from_reference(
281
  self,
 
314
  Body: [PR body here, maintaining the exact markdown format and structure of the original]"""
315
 
316
  try:
317
+ _usage_info, generated_content = llm_translate(prompt)
318
+ generated_content = generated_content.strip()
319
 
320
  # Separate title and body from response
321
  lines = generated_content.split("\n")
 
356
  self, target_language: str, filepath: str, target_filepath: str, file_name: str
357
  ) -> Tuple[str, str]:
358
  """Generate default PR content."""
359
+ title = f"🌐 [i18n-{target_language}] Translated `{file_name}` to {target_language}"
360
+ body = f"""# What does this PR do?
361
+
362
+ Translated the `{filepath}` file of the documentation to {target_language} 😄
363
+ Thank you in advance for your review!
364
+
365
+ Part of https://github.com/huggingface/transformers/issues/20179
366
+
367
+ ## Before reviewing
368
+ - [x] Check for missing / redundant translations (번역 누락/중복 검사)
369
+ - [x] Grammar Check (맞춤법 검사)
370
+ - [x] Review or Add new terms to glossary (용어 확인 및 추가)
371
+ - [x] Check Inline TOC (e.g. `[[lowercased-header]]`)
372
+ - [x] Check live-preview for gotchas (live-preview로 정상작동 확인)
373
+
374
+ ## Who can review? (Initial)
375
+ {target_language} translation reviewers
376
+
377
+ ## Before submitting
378
+ - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
379
+ - [x] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
380
+ Pull Request section?
381
+ - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
382
+ to it if that's the case.
383
+ - [x] Did you make sure to update the documentation with your changes? Here are the
384
+ [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and
385
+ [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
386
+ - [ ] Did you write any new necessary tests?
387
+
388
+ ## Who can review? (Final)
389
+ May you please review this PR?
390
+ Documentation maintainers
391
  """
392
  return title, body
393
 
394
  def generate_commit_message_from_reference(
395
  self, commit_messages: List[str], target_language: str, file_name: str
396
  ) -> str:
397
+ """Generate simple commit message using template."""
398
+ return f"docs: {target_language}: {file_name}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
401
  """Get information about an existing branch."""
 
437
 
438
  # 2. Generate translation file path and branch name
439
  target_filepath = filepath.replace("/en/", f"/{target_language}/")
440
+ file_name = filepath.split("/")[-1] # Keep .md extension
441
 
442
  print(f"🌿 Generating branch name...")
443
  branch_name = self.generate_branch_name_from_reference(
translator/content.py CHANGED
@@ -1,9 +1,12 @@
 
1
  import re
2
  import string
3
 
4
  import requests
5
  from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
 
 
7
 
8
  from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
9
  from translator.project_config import get_project_config
@@ -167,10 +170,45 @@ def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
167
 
168
 
169
  def llm_translate(to_translate: str) -> tuple[str, str]:
170
- with get_openai_callback() as cb:
 
 
 
 
171
  model = ChatAnthropic(
172
  model="claude-sonnet-4-20250514", max_tokens=64000, streaming=True
173
  )
174
  ai_message = model.invoke(to_translate)
175
- print("cb:", cb)
176
- return str(cb), ai_message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import re
3
  import string
4
 
5
  import requests
6
  from langchain.callbacks import get_openai_callback
7
  from langchain_anthropic import ChatAnthropic
8
+ import boto3
9
+ import json
10
 
11
  from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
12
  from translator.project_config import get_project_config
 
170
 
171
 
172
  def llm_translate(to_translate: str) -> tuple[str, str]:
173
+ anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
174
+ aws_bearer_token_bedrock = os.environ.get("AWS_BEARER_TOKEN_BEDROCK")
175
+
176
+ if anthropic_api_key:
177
+ # Use Anthropic API Key
178
  model = ChatAnthropic(
179
  model="claude-sonnet-4-20250514", max_tokens=64000, streaming=True
180
  )
181
  ai_message = model.invoke(to_translate)
182
+ cb = "Anthropic API Key used"
183
+ return str(cb), ai_message.content
184
+
185
+ elif aws_bearer_token_bedrock:
186
+ # Use AWS Bedrock with bearer token (assuming standard AWS credential chain is configured)
187
+ # Note: boto3 does not directly use a 'bearer_token' named environment variable for SigV4 authentication.
188
+ # It relies on AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, or IAM roles.
189
+ # If AWS_BEARER_TOKEN_BEDROCK is meant to be one of these, it should be renamed accordingly.
190
+ # For now, we proceed assuming standard AWS credential chain is configured to pick up credentials.
191
+ client = boto3.client("bedrock-runtime", region_name="eu-north-1")
192
+
193
+ body = {
194
+ "messages": [
195
+ {"role": "user", "content": to_translate}
196
+ ],
197
+ "max_tokens": 128000,
198
+ "anthropic_version": "bedrock-2023-05-31"
199
+ }
200
+
201
+ response = client.invoke_model(
202
+ modelId="arn:aws:bedrock:eu-north-1:235729104418:inference-profile/eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
203
+ contentType="application/json",
204
+ accept="application/json",
205
+ body=json.dumps(body),
206
+ )
207
+ result = json.loads(response["body"].read())
208
+ cb = result["usage"]
209
+ ai_message = result["content"][0]["text"]
210
+
211
+ return str(cb), ai_message
212
+
213
+ else:
214
+ raise ValueError("No API key found for translation. Please set ANTHROPIC_API_KEY or AWS_BEARER_TOKEN_BEDROCK environment variable.")