Support smolagent (#5)
Browse files* Toctree logic update
* feat: Add AWS Bedrock integration and API provider selection
This commit introduces the following enhancements:
- **API Provider Selection**: Implemented a new feature allowing users to select between Anthropic and AWS Bedrock as their preferred API provider for translation tasks.
- **AWS Bedrock Integration**: Integrated AWS Bedrock as a translation backend, enabling the use of AWS Bedrock models via a bearer token.
- **Dynamic API Key Management**: Updated the configuration panel and backend logic to dynamically manage and apply API keys based on the selected provider, ensuring only the active provider's key is used.
- **Improved PR Generation**: Enhanced the default Pull Request generation with a more detailed and structured template, including a comprehensive checklist for reviewers.
- **Model Configuration**: Added `config.py` to centralize default model definitions.
These changes provide greater flexibility in choosing translation services and improve the overall user experience for PR generation.
- agent/handler.py +36 -7
- agent/toctree_handler.py +136 -6
- agent/workflow.py +34 -6
- app.py +24 -1
- config.py +10 -0
- pr_generator/agent.py +44 -59
- translator/content.py +41 -3
@@ -33,6 +33,7 @@ class ChatState:
|
|
33 |
# Persistent settings (preserved across restarts)
|
34 |
self.persistent_settings = {
|
35 |
"anthropic_api_key": "",
|
|
|
36 |
"github_config": {
|
37 |
"token": "",
|
38 |
"owner": "",
|
@@ -374,14 +375,29 @@ def update_language_selection(lang, history):
|
|
374 |
return history, "", update_status(), lang
|
375 |
|
376 |
|
377 |
-
def update_persistent_config(anthropic_key, github_token, github_owner, github_repo, reference_pr_url, history):
|
378 |
"""Update persistent configuration settings."""
|
379 |
global state
|
380 |
|
381 |
-
# Update API keys
|
382 |
-
if
|
383 |
state.persistent_settings["anthropic_api_key"] = anthropic_key
|
384 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
|
386 |
if github_token:
|
387 |
os.environ["GITHUB_TOKEN"] = github_token
|
@@ -406,8 +422,11 @@ def update_persistent_config(anthropic_key, github_token, github_owner, github_r
|
|
406 |
response = "✅ Configuration saved!"
|
407 |
if github_owner and github_repo:
|
408 |
response += f" GitHub: {github_owner}/{github_repo}"
|
409 |
-
|
|
|
410 |
response += " Anthropic API key updated."
|
|
|
|
|
411 |
|
412 |
history.append(["Configuration update", response])
|
413 |
return history, "", update_status()
|
@@ -457,12 +476,20 @@ def send_message(message, history):
|
|
457 |
def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
|
458 |
# Use persistent anthropic key
|
459 |
anthropic_key = state.persistent_settings["anthropic_api_key"]
|
460 |
-
|
461 |
-
|
|
|
|
|
462 |
history.append(["Translation request", response])
|
463 |
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
464 |
|
465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
# Check if file path is provided
|
468 |
if not file_to_translate or not file_to_translate.strip():
|
@@ -602,6 +629,8 @@ def restart_handler(history):
|
|
602 |
# Restore environment variables
|
603 |
if backup_settings["anthropic_api_key"]:
|
604 |
os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
|
|
|
|
|
605 |
if backup_settings["github_config"]["token"]:
|
606 |
os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
|
607 |
|
|
|
33 |
# Persistent settings (preserved across restarts)
|
34 |
self.persistent_settings = {
|
35 |
"anthropic_api_key": "",
|
36 |
+
"aws_bearer_token_bedrock": "",
|
37 |
"github_config": {
|
38 |
"token": "",
|
39 |
"owner": "",
|
|
|
375 |
return history, "", update_status(), lang
|
376 |
|
377 |
|
378 |
+
def update_persistent_config(api_provider, anthropic_key, aws_bearer_token_bedrock, github_token, github_owner, github_repo, reference_pr_url, history):
|
379 |
"""Update persistent configuration settings."""
|
380 |
global state
|
381 |
|
382 |
+
# Update API keys based on provider selection
|
383 |
+
if api_provider == "Anthropic":
|
384 |
state.persistent_settings["anthropic_api_key"] = anthropic_key
|
385 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
386 |
+
# Clear AWS Bedrock token if Anthropic is selected
|
387 |
+
state.persistent_settings["aws_bearer_token_bedrock"] = ""
|
388 |
+
os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None)
|
389 |
+
elif api_provider == "AWS Bedrock":
|
390 |
+
state.persistent_settings["aws_bearer_token_bedrock"] = aws_bearer_token_bedrock
|
391 |
+
os.environ["AWS_BEARER_TOKEN_BEDROCK"] = aws_bearer_token_bedrock
|
392 |
+
# Clear Anthropic key if AWS Bedrock is selected
|
393 |
+
state.persistent_settings["anthropic_api_key"] = ""
|
394 |
+
os.environ.pop("ANTHROPIC_API_KEY", None)
|
395 |
+
else:
|
396 |
+
# If no provider is selected or unknown, clear both
|
397 |
+
state.persistent_settings["anthropic_api_key"] = ""
|
398 |
+
os.environ.pop("ANTHROPIC_API_KEY", None)
|
399 |
+
state.persistent_settings["aws_bearer_token_bedrock"] = ""
|
400 |
+
os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None)
|
401 |
|
402 |
if github_token:
|
403 |
os.environ["GITHUB_TOKEN"] = github_token
|
|
|
422 |
response = "✅ Configuration saved!"
|
423 |
if github_owner and github_repo:
|
424 |
response += f" GitHub: {github_owner}/{github_repo}"
|
425 |
+
|
426 |
+
if api_provider == "Anthropic" and anthropic_key:
|
427 |
response += " Anthropic API key updated."
|
428 |
+
elif api_provider == "AWS Bedrock" and aws_bearer_token_bedrock:
|
429 |
+
response += " AWS Bedrock Bearer Token updated."
|
430 |
|
431 |
history.append(["Configuration update", response])
|
432 |
return history, "", update_status()
|
|
|
476 |
def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
|
477 |
# Use persistent anthropic key
|
478 |
anthropic_key = state.persistent_settings["anthropic_api_key"]
|
479 |
+
aws_bearer_token_bedrock = state.persistent_settings["aws_bearer_token_bedrock"]
|
480 |
+
|
481 |
+
if not anthropic_key and not aws_bearer_token_bedrock:
|
482 |
+
response = "❌ Please set either Anthropic API key or AWS Bearer Token for Bedrock in Configuration panel first."
|
483 |
history.append(["Translation request", response])
|
484 |
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
485 |
|
486 |
+
# Set the active API key to environment variable for translator.content.py
|
487 |
+
if anthropic_key:
|
488 |
+
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
489 |
+
os.environ.pop("AWS_BEARER_TOKEN_BEDROCK", None) # Ensure only one is active
|
490 |
+
elif aws_bearer_token_bedrock:
|
491 |
+
os.environ["AWS_BEARER_TOKEN_BEDROCK"] = aws_bearer_token_bedrock
|
492 |
+
os.environ.pop("ANTHROPIC_API_KEY", None) # Ensure only one is active
|
493 |
|
494 |
# Check if file path is provided
|
495 |
if not file_to_translate or not file_to_translate.strip():
|
|
|
629 |
# Restore environment variables
|
630 |
if backup_settings["anthropic_api_key"]:
|
631 |
os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
|
632 |
+
if backup_settings["aws_bearer_token_bedrock"]:
|
633 |
+
os.environ["AWS_BEARER_TOKEN_BEDROCK"] = backup_settings["aws_bearer_token_bedrock"]
|
634 |
if backup_settings["github_config"]["token"]:
|
635 |
os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
|
636 |
|
@@ -134,8 +134,10 @@ Korean title:"""
|
|
134 |
english_title = self.find_title_for_local(en_toctree, target_local)
|
135 |
|
136 |
if not english_title:
|
137 |
-
print(f"
|
138 |
-
|
|
|
|
|
139 |
|
140 |
print(f"Found English title: {english_title} for local: {target_local}")
|
141 |
|
@@ -153,10 +155,11 @@ Korean title:"""
|
|
153 |
)
|
154 |
|
155 |
if updated:
|
156 |
-
print(f"Successfully updated translation entry: local={target_local}, title={korean_title}")
|
157 |
return updated_toctree
|
158 |
else:
|
159 |
-
print(f"
|
|
|
160 |
return ko_toctree
|
161 |
|
162 |
except Exception as e:
|
@@ -192,8 +195,8 @@ Korean title:"""
|
|
192 |
# Get Korean toctree
|
193 |
ko_toctree = self.get_ko_toctree()
|
194 |
|
195 |
-
#
|
196 |
-
updated_ko_toctree = self.
|
197 |
|
198 |
if not updated_ko_toctree:
|
199 |
print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}")
|
@@ -287,3 +290,130 @@ Korean title:"""
|
|
287 |
"status": "error",
|
288 |
"message": f"Error updating toctree: {str(e)}"
|
289 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
english_title = self.find_title_for_local(en_toctree, target_local)
|
135 |
|
136 |
if not english_title:
|
137 |
+
print(f"⚠️ Toctree entry not found: '{target_local}' not in English toctree")
|
138 |
+
print(f"🔍 Attempting to find appropriate section for new entry...")
|
139 |
+
# Try to add new entry in appropriate location
|
140 |
+
return self.add_new_toctree_entry(ko_toctree, target_local)
|
141 |
|
142 |
print(f"Found English title: {english_title} for local: {target_local}")
|
143 |
|
|
|
155 |
)
|
156 |
|
157 |
if updated:
|
158 |
+
print(f"✅ Successfully updated translation entry: local={target_local}, title={korean_title}")
|
159 |
return updated_toctree
|
160 |
else:
|
161 |
+
print(f"⚠️ Toctree update skipped: '(번역중) {english_title}' entry not found")
|
162 |
+
print(f"📋 This may be a new file not yet added to Korean toctree")
|
163 |
return ko_toctree
|
164 |
|
165 |
except Exception as e:
|
|
|
195 |
# Get Korean toctree
|
196 |
ko_toctree = self.get_ko_toctree()
|
197 |
|
198 |
+
# Use diff-merge algorithm to add new entry
|
199 |
+
updated_ko_toctree = self.add_new_toctree_entry(ko_toctree, filepath_without_prefix)
|
200 |
|
201 |
if not updated_ko_toctree:
|
202 |
print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}")
|
|
|
290 |
"status": "error",
|
291 |
"message": f"Error updating toctree: {str(e)}"
|
292 |
}
|
293 |
+
|
294 |
+
def add_new_toctree_entry(self, ko_toctree: list, target_local: str) -> list:
|
295 |
+
"""Add new toctree entry using diff-merge algorithm"""
|
296 |
+
try:
|
297 |
+
import copy
|
298 |
+
updated_toctree = copy.deepcopy(ko_toctree)
|
299 |
+
|
300 |
+
# Generate new entry
|
301 |
+
filename = target_local.split('/')[-1].replace('_', ' ').title()
|
302 |
+
korean_title = self.translate_title(filename)
|
303 |
+
new_entry = {
|
304 |
+
'local': target_local,
|
305 |
+
'title': korean_title
|
306 |
+
}
|
307 |
+
|
308 |
+
# Get English toctree for structure reference
|
309 |
+
en_toctree = self.get_en_toctree()
|
310 |
+
|
311 |
+
# Use diff-merge algorithm
|
312 |
+
if self.merge_toctree_sections(en_toctree, updated_toctree, target_local, new_entry):
|
313 |
+
return updated_toctree
|
314 |
+
else:
|
315 |
+
# Fallback: add to root level
|
316 |
+
updated_toctree.append(new_entry)
|
317 |
+
print(f"✅ Added new entry at root level: {target_local} -> {korean_title}")
|
318 |
+
return updated_toctree
|
319 |
+
|
320 |
+
except Exception as e:
|
321 |
+
print(f"❌ Error adding new toctree entry: {e}")
|
322 |
+
return ko_toctree
|
323 |
+
|
324 |
+
def merge_toctree_sections(self, en_toctree: list, ko_toctree: list, target_local: str, new_entry: dict) -> bool:
|
325 |
+
"""Merge English toctree structure into Korean toctree for target_local"""
|
326 |
+
for en_section in en_toctree:
|
327 |
+
en_title = en_section.get('title')
|
328 |
+
|
329 |
+
# Check if this English section contains our target
|
330 |
+
if self.contains_target(en_section, target_local):
|
331 |
+
# Find matching Korean section
|
332 |
+
ko_section = self.find_matching_section(ko_toctree, en_title)
|
333 |
+
|
334 |
+
if ko_section:
|
335 |
+
# Section exists - merge subsections
|
336 |
+
return self.merge_subsections(en_section, ko_section, target_local, new_entry)
|
337 |
+
else:
|
338 |
+
# Section doesn't exist - create new section
|
339 |
+
new_ko_section = self.create_section_with_order(en_section, target_local, new_entry)
|
340 |
+
ko_toctree.append(new_ko_section)
|
341 |
+
print(f"✅ Created new section '{new_ko_section.get('title')}' with ordered structure")
|
342 |
+
return True
|
343 |
+
return False
|
344 |
+
|
345 |
+
def contains_target(self, section: dict, target_local: str) -> bool:
|
346 |
+
"""Check if section contains target_local recursively"""
|
347 |
+
if 'sections' in section:
|
348 |
+
for subsection in section['sections']:
|
349 |
+
if subsection.get('local') == target_local:
|
350 |
+
return True
|
351 |
+
if self.contains_target(subsection, target_local):
|
352 |
+
return True
|
353 |
+
return False
|
354 |
+
|
355 |
+
def find_matching_section(self, ko_toctree: list, en_title: str) -> dict:
|
356 |
+
"""Find Korean section that matches English title"""
|
357 |
+
# Try exact match first
|
358 |
+
for item in ko_toctree:
|
359 |
+
if item.get('title') == en_title:
|
360 |
+
return item
|
361 |
+
|
362 |
+
# Try translated title match
|
363 |
+
try:
|
364 |
+
translated_title = self.translate_title(en_title)
|
365 |
+
for item in ko_toctree:
|
366 |
+
if item.get('title') == translated_title:
|
367 |
+
return item
|
368 |
+
except:
|
369 |
+
pass
|
370 |
+
|
371 |
+
return None
|
372 |
+
|
373 |
+
def merge_subsections(self, en_section: dict, ko_section: dict, target_local: str, new_entry: dict) -> bool:
|
374 |
+
"""Merge subsections while maintaining order"""
|
375 |
+
if 'sections' not in en_section:
|
376 |
+
return False
|
377 |
+
|
378 |
+
# Find target index in English sections
|
379 |
+
target_index = -1
|
380 |
+
for i, en_subsection in enumerate(en_section['sections']):
|
381 |
+
if en_subsection.get('local') == target_local:
|
382 |
+
target_index = i
|
383 |
+
break
|
384 |
+
|
385 |
+
if target_index == -1:
|
386 |
+
return False
|
387 |
+
|
388 |
+
# Ensure Korean section has sections array
|
389 |
+
if 'sections' not in ko_section:
|
390 |
+
ko_section['sections'] = []
|
391 |
+
|
392 |
+
# Insert at correct position
|
393 |
+
self.insert_at_correct_position(ko_section, target_index, new_entry)
|
394 |
+
return True
|
395 |
+
|
396 |
+
def insert_at_correct_position(self, ko_section: dict, target_index: int, new_entry: dict):
|
397 |
+
"""Insert entry at correct position, expanding array if needed"""
|
398 |
+
sections = ko_section['sections']
|
399 |
+
|
400 |
+
# Expand sections array if needed
|
401 |
+
while len(sections) <= target_index:
|
402 |
+
sections.append(None) # Placeholder
|
403 |
+
|
404 |
+
# Insert new entry
|
405 |
+
sections[target_index] = new_entry
|
406 |
+
|
407 |
+
# Clean up None placeholders at the end
|
408 |
+
while sections and sections[-1] is None:
|
409 |
+
sections.pop()
|
410 |
+
|
411 |
+
def create_section_with_order(self, en_section: dict, target_local: str, new_entry: dict) -> dict:
|
412 |
+
"""Create new Korean section with only the translated entry"""
|
413 |
+
new_ko_section = {
|
414 |
+
'title': self.translate_title(en_section.get('title')),
|
415 |
+
'isExpanded': en_section.get('isExpanded', False),
|
416 |
+
'sections': [new_entry] # Only add the translated entry
|
417 |
+
}
|
418 |
+
|
419 |
+
return new_ko_section
|
@@ -261,30 +261,58 @@ def generate_github_pr(
|
|
261 |
{result["message"]}"""
|
262 |
|
263 |
elif result["status"] == "partial_success":
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
|
|
|
|
|
|
266 |
🌿 **Branch:** {result["branch"]}
|
267 |
📁 **File:** {result["file_path"]}{toctree_status}
|
268 |
|
269 |
{result["message"]}
|
270 |
|
271 |
**Error Details:**
|
272 |
-
{
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
else:
|
275 |
error_details = result.get("error_details", "No additional details")
|
276 |
return f"""❌ **GitHub PR Creation Failed**
|
277 |
|
|
|
|
|
|
|
|
|
278 |
**Error Message:**
|
279 |
{result["message"]}
|
280 |
|
281 |
**Error Details:**
|
282 |
{error_details}
|
283 |
|
284 |
-
💡 **
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
|
289 |
except Exception as e:
|
290 |
error_msg = f"""❌ **Unexpected Error During PR Creation**
|
|
|
261 |
{result["message"]}"""
|
262 |
|
263 |
elif result["status"] == "partial_success":
|
264 |
+
error_details = result.get("error_details", "Unknown error")
|
265 |
+
|
266 |
+
# Check if it's "existing PR" case (not really an error)
|
267 |
+
if "Existing PR found" in error_details:
|
268 |
+
existing_pr_url = error_details.split(": ")[-1] if ": " in error_details else "Unknown"
|
269 |
+
return f"""🔄 **Translation Updated Successfully**
|
270 |
+
|
271 |
+
🎯 **Selected Project:** {project}
|
272 |
+
🌿 **Branch:** {result["branch"]}
|
273 |
+
📁 **File:** {result["file_path"]}{toctree_status}
|
274 |
+
|
275 |
+
🔗 **Existing PR Updated:** {existing_pr_url}
|
276 |
+
|
277 |
+
✅ Your translation has been added to the existing PR. The file and toctree have been successfully updated!"""
|
278 |
+
else:
|
279 |
+
# Actual error case
|
280 |
+
return f"""⚠️ **Partial Success**
|
281 |
|
282 |
+
🎯 **Selected Project:** {project}
|
283 |
+
🏠 **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
|
284 |
+
🎯 **Target Base:** {base_owner}/{base_repo}
|
285 |
🌿 **Branch:** {result["branch"]}
|
286 |
📁 **File:** {result["file_path"]}{toctree_status}
|
287 |
|
288 |
{result["message"]}
|
289 |
|
290 |
**Error Details:**
|
291 |
+
{error_details}
|
292 |
+
|
293 |
+
💡 **Project-Repository Mismatch Check:**
|
294 |
+
- Selected project '{project}' should match repository '{github_config.get('repo_name', 'REPO')}'
|
295 |
+
- For smolagents: use Jwaminju/smolagents fork
|
296 |
+
- For transformers: use Jwaminju/transformers fork"""
|
297 |
|
298 |
else:
|
299 |
error_details = result.get("error_details", "No additional details")
|
300 |
return f"""❌ **GitHub PR Creation Failed**
|
301 |
|
302 |
+
🎯 **Selected Project:** {project}
|
303 |
+
🏠 **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
|
304 |
+
🎯 **Target Base:** {base_owner}/{base_repo}
|
305 |
+
|
306 |
**Error Message:**
|
307 |
{result["message"]}
|
308 |
|
309 |
**Error Details:**
|
310 |
{error_details}
|
311 |
|
312 |
+
💡 **Project-Repository Mismatch:**
|
313 |
+
Selected project '{project}' but configured repository '{github_config.get('repo_name', 'REPO')}'
|
314 |
+
• For smolagents project: use 'smolagents' repository
|
315 |
+
• For transformers project: use 'transformers' repository"""
|
316 |
|
317 |
except Exception as e:
|
318 |
error_msg = f"""❌ **Unexpected Error During PR Creation**
|
@@ -138,10 +138,23 @@ with gr.Blocks(
|
|
138 |
gr.Markdown("### ⚙️ Configuration")
|
139 |
|
140 |
with gr.Accordion("🔧 API & GitHub Settings", open=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
config_anthropic_key = gr.Textbox(
|
142 |
label="🔑 Anthropic API Key",
|
143 |
type="password",
|
144 |
placeholder="sk-ant-...",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
)
|
146 |
config_github_token = gr.Textbox(
|
147 |
label="🔑 GitHub Token (Required for PR, Optional for file search)",
|
@@ -316,10 +329,20 @@ with gr.Blocks(
|
|
316 |
# Configuration Save
|
317 |
save_config_btn.click(
|
318 |
fn=update_persistent_config,
|
319 |
-
inputs=[config_anthropic_key, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
|
320 |
outputs=[chatbot, msg_input, status_display],
|
321 |
)
|
322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
approve_btn.click(
|
324 |
fn=approve_handler,
|
325 |
inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
|
|
|
138 |
gr.Markdown("### ⚙️ Configuration")
|
139 |
|
140 |
with gr.Accordion("🔧 API & GitHub Settings", open=True):
|
141 |
+
api_provider_radio = gr.Radio(
|
142 |
+
["Anthropic", "AWS Bedrock"],
|
143 |
+
label="Select API Provider",
|
144 |
+
value="Anthropic", # Default selection
|
145 |
+
interactive=True,
|
146 |
+
)
|
147 |
config_anthropic_key = gr.Textbox(
|
148 |
label="🔑 Anthropic API Key",
|
149 |
type="password",
|
150 |
placeholder="sk-ant-...",
|
151 |
+
visible=True, # Initially visible as Anthropic is default
|
152 |
+
)
|
153 |
+
config_aws_bearer_token_bedrock = gr.Textbox(
|
154 |
+
label="🔑 AWS Bearer Token for Bedrock",
|
155 |
+
type="password",
|
156 |
+
placeholder="AWS_BEARER_TOKEN_BEDROCK",
|
157 |
+
visible=False, # Initially hidden
|
158 |
)
|
159 |
config_github_token = gr.Textbox(
|
160 |
label="🔑 GitHub Token (Required for PR, Optional for file search)",
|
|
|
329 |
# Configuration Save
|
330 |
save_config_btn.click(
|
331 |
fn=update_persistent_config,
|
332 |
+
inputs=[api_provider_radio, config_anthropic_key, config_aws_bearer_token_bedrock, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
|
333 |
outputs=[chatbot, msg_input, status_display],
|
334 |
)
|
335 |
|
336 |
+
# API Provider selection handler
|
337 |
+
api_provider_radio.change(
|
338 |
+
fn=lambda provider: (
|
339 |
+
gr.update(visible=True) if provider == "Anthropic" else gr.update(visible=False),
|
340 |
+
gr.update(visible=True) if provider == "AWS Bedrock" else gr.update(visible=False),
|
341 |
+
),
|
342 |
+
inputs=[api_provider_radio],
|
343 |
+
outputs=[config_anthropic_key, config_aws_bearer_token_bedrock],
|
344 |
+
)
|
345 |
+
|
346 |
approve_btn.click(
|
347 |
fn=approve_handler,
|
348 |
inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# config.py
|
2 |
+
|
3 |
+
# 기본 모델 목록
|
4 |
+
default_models = [
|
5 |
+
"Helsinki-NLP/opus-mt-ko-en",
|
6 |
+
"Helsinki-NLP/opus-mt-tc-big-en-ko",
|
7 |
+
"davidkim205/iris-7b",
|
8 |
+
"maywell/Synatra-7B-v0.3-Translation",
|
9 |
+
"CUSTOM_MODEL_INPUT" # Placeholder for custom model input
|
10 |
+
]
|
@@ -11,6 +11,7 @@ from typing import Optional, Dict, List, Tuple, Any
|
|
11 |
|
12 |
# Load environment variables from .env file
|
13 |
from dotenv import load_dotenv
|
|
|
14 |
|
15 |
load_dotenv()
|
16 |
|
@@ -259,8 +260,8 @@ class GitHubPRAgent:
|
|
259 |
) -> str:
|
260 |
"""Generate text using LLM."""
|
261 |
try:
|
262 |
-
|
263 |
-
generated =
|
264 |
print(f"LLM generated {operation}: {generated}")
|
265 |
return generated
|
266 |
except Exception as e:
|
@@ -271,25 +272,10 @@ class GitHubPRAgent:
|
|
271 |
def generate_branch_name_from_reference(
|
272 |
self, reference_branch_name: str, target_language: str, file_name: str
|
273 |
) -> str:
|
274 |
-
"""
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
Now I need to generate a branch name for a new translation task:
|
280 |
-
- Target language: {target_language}
|
281 |
-
- File to translate: {file_name}
|
282 |
-
|
283 |
-
Please analyze the pattern and style of the reference PR title to generate a consistent new branch name.
|
284 |
-
|
285 |
-
Requirements:
|
286 |
-
1. Follow the naming conventions and patterns of the reference PR
|
287 |
-
2. Appropriately reflect the target language ({target_language}) and file name ({file_name}) if applicable
|
288 |
-
|
289 |
-
Please return only the branch name. No other explanation is needed."""
|
290 |
-
|
291 |
-
fallback = f"translate-{target_language}-{file_name.replace('_', '-')}"
|
292 |
-
return self._generate_with_llm(prompt, fallback, "branch name")
|
293 |
|
294 |
def generate_pr_content_from_reference(
|
295 |
self,
|
@@ -328,8 +314,8 @@ Title: [PR title here]
|
|
328 |
Body: [PR body here, maintaining the exact markdown format and structure of the original]"""
|
329 |
|
330 |
try:
|
331 |
-
|
332 |
-
generated_content =
|
333 |
|
334 |
# Separate title and body from response
|
335 |
lines = generated_content.split("\n")
|
@@ -370,47 +356,46 @@ Body: [PR body here, maintaining the exact markdown format and structure of the
|
|
370 |
self, target_language: str, filepath: str, target_filepath: str, file_name: str
|
371 |
) -> Tuple[str, str]:
|
372 |
"""Generate default PR content."""
|
373 |
-
title = f"[i18n-{target_language}]
|
374 |
-
body = f"""
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
"""
|
381 |
return title, body
|
382 |
|
383 |
def generate_commit_message_from_reference(
|
384 |
self, commit_messages: List[str], target_language: str, file_name: str
|
385 |
) -> str:
|
386 |
-
"""
|
387 |
-
|
388 |
-
"\n".join([f"- {msg}" for msg in commit_messages])
|
389 |
-
if commit_messages
|
390 |
-
else "None"
|
391 |
-
)
|
392 |
-
|
393 |
-
prompt = f"""Here are the commit messages from the reference PR:
|
394 |
-
|
395 |
-
{commits_text}
|
396 |
-
|
397 |
-
Now I need to generate a commit message for a new translation task:
|
398 |
-
- Target language: {target_language}
|
399 |
-
- File to translate: {file_name}
|
400 |
-
|
401 |
-
Please analyze the commit message patterns and style of the reference PR to generate a consistent new commit message.
|
402 |
-
|
403 |
-
Requirements:
|
404 |
-
1. Follow the commit message style and format of the reference PR
|
405 |
-
2. Appropriately reflect the target language ({target_language}) and file name ({file_name})
|
406 |
-
3. Follow general Git commit message conventions
|
407 |
-
4. Be concise and clear
|
408 |
-
5. If you detect typos in the given commit messages, use corrected versions (e.g., dos -> docs)
|
409 |
-
|
410 |
-
Please return only the commit message. No other explanation is needed."""
|
411 |
-
|
412 |
-
fallback = f"docs: add {target_language} translation for {file_name}"
|
413 |
-
return self._generate_with_llm(prompt, fallback, "commit message")
|
414 |
|
415 |
def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
|
416 |
"""Get information about an existing branch."""
|
@@ -452,7 +437,7 @@ Please return only the commit message. No other explanation is needed."""
|
|
452 |
|
453 |
# 2. Generate translation file path and branch name
|
454 |
target_filepath = filepath.replace("/en/", f"/{target_language}/")
|
455 |
-
file_name = filepath.split("/")[-1].
|
456 |
|
457 |
print(f"🌿 Generating branch name...")
|
458 |
branch_name = self.generate_branch_name_from_reference(
|
|
|
11 |
|
12 |
# Load environment variables from .env file
|
13 |
from dotenv import load_dotenv
|
14 |
+
from translator.content import llm_translate
|
15 |
|
16 |
load_dotenv()
|
17 |
|
|
|
260 |
) -> str:
|
261 |
"""Generate text using LLM."""
|
262 |
try:
|
263 |
+
_usage_info, generated = llm_translate(prompt)
|
264 |
+
generated = generated.strip()
|
265 |
print(f"LLM generated {operation}: {generated}")
|
266 |
return generated
|
267 |
except Exception as e:
|
|
|
272 |
def generate_branch_name_from_reference(
|
273 |
self, reference_branch_name: str, target_language: str, file_name: str
|
274 |
) -> str:
|
275 |
+
"""Generate branch name using simple template."""
|
276 |
+
# Keep .md extension and make branch-safe
|
277 |
+
branch_safe_name = file_name.replace('_', '-')
|
278 |
+
return f"{target_language}-{branch_safe_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
def generate_pr_content_from_reference(
|
281 |
self,
|
|
|
314 |
Body: [PR body here, maintaining the exact markdown format and structure of the original]"""
|
315 |
|
316 |
try:
|
317 |
+
_usage_info, generated_content = llm_translate(prompt)
|
318 |
+
generated_content = generated_content.strip()
|
319 |
|
320 |
# Separate title and body from response
|
321 |
lines = generated_content.split("\n")
|
|
|
356 |
self, target_language: str, filepath: str, target_filepath: str, file_name: str
|
357 |
) -> Tuple[str, str]:
|
358 |
"""Generate default PR content."""
|
359 |
+
title = f"🌐 [i18n-{target_language}] Translated `{file_name}` to {target_language}"
|
360 |
+
body = f"""# What does this PR do?
|
361 |
+
|
362 |
+
Translated the `{filepath}` file of the documentation to {target_language} 😄
|
363 |
+
Thank you in advance for your review!
|
364 |
+
|
365 |
+
Part of https://github.com/huggingface/transformers/issues/20179
|
366 |
+
|
367 |
+
## Before reviewing
|
368 |
+
- [x] Check for missing / redundant translations (번역 누락/중복 검사)
|
369 |
+
- [x] Grammar Check (맞춤법 검사)
|
370 |
+
- [x] Review or Add new terms to glossary (용어 확인 및 추가)
|
371 |
+
- [x] Check Inline TOC (e.g. `[[lowercased-header]]`)
|
372 |
+
- [x] Check live-preview for gotchas (live-preview로 정상작동 확인)
|
373 |
+
|
374 |
+
## Who can review? (Initial)
|
375 |
+
{target_language} translation reviewers
|
376 |
+
|
377 |
+
## Before submitting
|
378 |
+
- [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
|
379 |
+
- [x] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
|
380 |
+
Pull Request section?
|
381 |
+
- [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
|
382 |
+
to it if that's the case.
|
383 |
+
- [x] Did you make sure to update the documentation with your changes? Here are the
|
384 |
+
[documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and
|
385 |
+
[here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
|
386 |
+
- [ ] Did you write any new necessary tests?
|
387 |
+
|
388 |
+
## Who can review? (Final)
|
389 |
+
May you please review this PR?
|
390 |
+
Documentation maintainers
|
391 |
"""
|
392 |
return title, body
|
393 |
|
394 |
def generate_commit_message_from_reference(
|
395 |
self, commit_messages: List[str], target_language: str, file_name: str
|
396 |
) -> str:
|
397 |
+
"""Generate simple commit message using template."""
|
398 |
+
return f"docs: {target_language}: {file_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
|
401 |
"""Get information about an existing branch."""
|
|
|
437 |
|
438 |
# 2. Generate translation file path and branch name
|
439 |
target_filepath = filepath.replace("/en/", f"/{target_language}/")
|
440 |
+
file_name = filepath.split("/")[-1] # Keep .md extension
|
441 |
|
442 |
print(f"🌿 Generating branch name...")
|
443 |
branch_name = self.generate_branch_name_from_reference(
|
@@ -1,9 +1,12 @@
|
|
|
|
1 |
import re
|
2 |
import string
|
3 |
|
4 |
import requests
|
5 |
from langchain.callbacks import get_openai_callback
|
6 |
from langchain_anthropic import ChatAnthropic
|
|
|
|
|
7 |
|
8 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
9 |
from translator.project_config import get_project_config
|
@@ -167,10 +170,45 @@ def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
|
|
167 |
|
168 |
|
169 |
def llm_translate(to_translate: str) -> tuple[str, str]:
|
170 |
-
|
|
|
|
|
|
|
|
|
171 |
model = ChatAnthropic(
|
172 |
model="claude-sonnet-4-20250514", max_tokens=64000, streaming=True
|
173 |
)
|
174 |
ai_message = model.invoke(to_translate)
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import re
|
3 |
import string
|
4 |
|
5 |
import requests
|
6 |
from langchain.callbacks import get_openai_callback
|
7 |
from langchain_anthropic import ChatAnthropic
|
8 |
+
import boto3
|
9 |
+
import json
|
10 |
|
11 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
12 |
from translator.project_config import get_project_config
|
|
|
170 |
|
171 |
|
172 |
def llm_translate(to_translate: str) -> tuple[str, str]:
|
173 |
+
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
|
174 |
+
aws_bearer_token_bedrock = os.environ.get("AWS_BEARER_TOKEN_BEDROCK")
|
175 |
+
|
176 |
+
if anthropic_api_key:
|
177 |
+
# Use Anthropic API Key
|
178 |
model = ChatAnthropic(
|
179 |
model="claude-sonnet-4-20250514", max_tokens=64000, streaming=True
|
180 |
)
|
181 |
ai_message = model.invoke(to_translate)
|
182 |
+
cb = "Anthropic API Key used"
|
183 |
+
return str(cb), ai_message.content
|
184 |
+
|
185 |
+
elif aws_bearer_token_bedrock:
|
186 |
+
# Use AWS Bedrock with bearer token (assuming standard AWS credential chain is configured)
|
187 |
+
# Note: boto3 does not directly use a 'bearer_token' named environment variable for SigV4 authentication.
|
188 |
+
# It relies on AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, or IAM roles.
|
189 |
+
# If AWS_BEARER_TOKEN_BEDROCK is meant to be one of these, it should be renamed accordingly.
|
190 |
+
# For now, we proceed assuming standard AWS credential chain is configured to pick up credentials.
|
191 |
+
client = boto3.client("bedrock-runtime", region_name="eu-north-1")
|
192 |
+
|
193 |
+
body = {
|
194 |
+
"messages": [
|
195 |
+
{"role": "user", "content": to_translate}
|
196 |
+
],
|
197 |
+
"max_tokens": 128000,
|
198 |
+
"anthropic_version": "bedrock-2023-05-31"
|
199 |
+
}
|
200 |
+
|
201 |
+
response = client.invoke_model(
|
202 |
+
modelId="arn:aws:bedrock:eu-north-1:235729104418:inference-profile/eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
203 |
+
contentType="application/json",
|
204 |
+
accept="application/json",
|
205 |
+
body=json.dumps(body),
|
206 |
+
)
|
207 |
+
result = json.loads(response["body"].read())
|
208 |
+
cb = result["usage"]
|
209 |
+
ai_message = result["content"][0]["text"]
|
210 |
+
|
211 |
+
return str(cb), ai_message
|
212 |
+
|
213 |
+
else:
|
214 |
+
raise ValueError("No API key found for translation. Please set ANTHROPIC_API_KEY or AWS_BEARER_TOKEN_BEDROCK environment variable.")
|