Update README.md
Browse files
README.md
CHANGED
@@ -40,80 +40,51 @@ tags:
|
|
40 |
```python
|
41 |
from transformers import MarianMTModel, MarianTokenizer
|
42 |
|
43 |
-
#
|
44 |
model_path = "trained_model"
|
45 |
|
46 |
-
#
|
47 |
tokenizer = MarianTokenizer.from_pretrained(model_path)
|
48 |
model = MarianMTModel.from_pretrained(model_path)
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
"
|
54 |
-
"bos_Latn": "bos_Latn",
|
55 |
-
"bul": "bul",
|
56 |
-
"bul_Latn": "bul_Latn",
|
57 |
-
"ces": "ces",
|
58 |
-
"dsb": "dsb",
|
59 |
-
"hrv": "hrv",
|
60 |
-
"hsb": "hsb",
|
61 |
-
"mkd": "mkd",
|
62 |
-
"orv_Cyrl": "orv_Cyrl",
|
63 |
-
"pol": "pol",
|
64 |
-
"rus": "rus",
|
65 |
-
"slv": "slv",
|
66 |
-
"srp_Cyrl": "srp_Cyrl",
|
67 |
-
"srp_Latn": "srp_Latn",
|
68 |
-
"ukr": "ukr"
|
69 |
-
}
|
70 |
-
|
71 |
-
# Функция для перевода с несколькими вариантами
|
72 |
-
def translate_text(source_text, target_language, num_translations=3):
|
73 |
-
if target_language not in language_map:
|
74 |
-
print("Неверное направление перевода. Попробуйте снова.")
|
75 |
-
return None
|
76 |
-
|
77 |
-
# Формируем текст с маркером целевого языка
|
78 |
-
language_token = f">>{target_language}<< "
|
79 |
-
text_with_token = language_token + source_text
|
80 |
|
81 |
-
#
|
82 |
inputs = tokenizer(text_with_token, return_tensors="pt")
|
83 |
|
84 |
-
#
|
85 |
translated_tokens = model.generate(
|
86 |
**inputs,
|
87 |
-
num_return_sequences=num_translations, #
|
88 |
-
num_beams=num_translations #
|
89 |
)
|
90 |
|
91 |
-
#
|
92 |
translations = [tokenizer.decode(tokens, skip_special_tokens=True) for tokens in translated_tokens]
|
93 |
return translations
|
94 |
|
95 |
-
#
|
96 |
-
print("
|
97 |
|
98 |
while True:
|
99 |
-
#
|
100 |
-
source_text = input("
|
101 |
|
102 |
-
#
|
103 |
if source_text == "!q":
|
104 |
-
print("
|
105 |
break
|
106 |
|
107 |
-
#
|
108 |
-
|
109 |
-
|
110 |
-
# Перевод фразы с несколькими вариантами
|
111 |
-
translations = translate_text(source_text, target_language)
|
112 |
|
113 |
if translations:
|
114 |
-
#
|
115 |
for idx, translation in enumerate(translations, 1):
|
116 |
-
print(f"
|
117 |
|
118 |
```
|
119 |
|
|
|
40 |
```python
|
41 |
from transformers import MarianMTModel, MarianTokenizer
|
42 |
|
43 |
+
# Paths to the model and tokenizer
|
44 |
model_path = "trained_model"
|
45 |
|
46 |
+
# Load the model and tokenizer
|
47 |
tokenizer = MarianTokenizer.from_pretrained(model_path)
|
48 |
model = MarianMTModel.from_pretrained(model_path)
|
49 |
|
50 |
+
# Function to translate text with multiple variants (Russian only)
|
51 |
+
def translate_text(source_text, num_translations=3):
|
52 |
+
# Add the fixed language token for Russian
|
53 |
+
text_with_token = ">>rus<< " + source_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
# Tokenize the input text
|
56 |
inputs = tokenizer(text_with_token, return_tensors="pt")
|
57 |
|
58 |
+
# Generate translations with multiple variants
|
59 |
translated_tokens = model.generate(
|
60 |
**inputs,
|
61 |
+
num_return_sequences=num_translations, # Number of translation variants
|
62 |
+
num_beams=num_translations # Use multiple beams for diversity
|
63 |
)
|
64 |
|
65 |
+
# Decode the translated tokens into readable text
|
66 |
translations = [tokenizer.decode(tokens, skip_special_tokens=True) for tokens in translated_tokens]
|
67 |
return translations
|
68 |
|
69 |
+
# Main loop for text input and translation output
|
70 |
+
print("Enter a phrase to translate or !q to quit.")
|
71 |
|
72 |
while True:
|
73 |
+
# Get input phrase from the user
|
74 |
+
source_text = input("Enter a phrase: ")
|
75 |
|
76 |
+
# Check for the quit command
|
77 |
if source_text == "!q":
|
78 |
+
print("Exiting the program.")
|
79 |
break
|
80 |
|
81 |
+
# Translate the phrase with multiple variants
|
82 |
+
translations = translate_text(source_text)
|
|
|
|
|
|
|
83 |
|
84 |
if translations:
|
85 |
+
# Output all translation variants
|
86 |
for idx, translation in enumerate(translations, 1):
|
87 |
+
print(f"Variant {idx}: {translation}")
|
88 |
|
89 |
```
|
90 |
|