Upload 7 files
Browse files- README.md.txt +39 -0
- config.json +9 -0
- inference.py +16 -0
- model_card.yaml.txt +48 -0
- pytorch_model.bin +3 -0
- tokenizer.json +4 -0
- vocab.txt +3 -0
README.md.txt
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
- te
|
5 |
+
tags:
|
6 |
+
- translation
|
7 |
+
- machine-translation
|
8 |
+
- NLP
|
9 |
+
- pytorch
|
10 |
+
license: "MIT"
|
11 |
+
datasets:
|
12 |
+
- hima06varshini/english-telugu-parallel-corpus
|
13 |
+
widget:
|
14 |
+
- text: "Hello, how are you?"
|
15 |
+
---
|
16 |
+
|
17 |
+
# 🏆 English-to-Telugu Translation Model
|
18 |
+
|
19 |
+
This model translates **English** text into **Telugu** using a Transformer-based approach.
|
20 |
+
|
21 |
+
## 📂 Model Details
|
22 |
+
- **Model Name**: `hima06varshini/english-to-telugu-translation`
|
23 |
+
- **Developed by**: Y. Himavarshini
|
24 |
+
- **License**: MIT License
|
25 |
+
|
26 |
+
## 🚀 Usage
|
27 |
+
```python
|
28 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
29 |
+
|
30 |
+
model_name = "hima06varshini/english-to-telugu-translation"
|
31 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
33 |
+
|
34 |
+
def translate(text):
|
35 |
+
inputs = tokenizer(text, return_tensors="pt")
|
36 |
+
outputs = model.generate(**inputs)
|
37 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
38 |
+
|
39 |
+
print(translate("Hello, how are you?"))
|
config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "transformer",
|
3 |
+
"num_layers": 6,
|
4 |
+
"hidden_size": 512,
|
5 |
+
"vocab_size": 32000,
|
6 |
+
"max_position_embeddings": 512,
|
7 |
+
"attention_heads": 8,
|
8 |
+
"dropout_rate": 0.1
|
9 |
+
}
|
inference.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
2 |
+
|
3 |
+
# Load model and tokenizer
|
4 |
+
model_name = "hima06varshini/english-to-telugu-translation"
|
5 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
7 |
+
|
8 |
+
def translate(text):
|
9 |
+
inputs = tokenizer(text, return_tensors="pt")
|
10 |
+
outputs = model.generate(**inputs)
|
11 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
12 |
+
|
13 |
+
# Example usage
|
14 |
+
text = "Hello, how are you?"
|
15 |
+
translation = translate(text)
|
16 |
+
print("Translated text:", translation)
|
model_card.yaml.txt
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "english-to-telugu-translation"
|
2 |
+
tags:
|
3 |
+
- translation
|
4 |
+
- machine-translation
|
5 |
+
- english
|
6 |
+
- telugu
|
7 |
+
library_name: "transformers"
|
8 |
+
license: "MIT"
|
9 |
+
language:
|
10 |
+
- en
|
11 |
+
- te
|
12 |
+
metrics:
|
13 |
+
- "BLEU"
|
14 |
+
- "ROUGE"
|
15 |
+
- "METEOR"
|
16 |
+
model_description: |
|
17 |
+
This model is trained for English-to-Telugu translation using a Transformer-based architecture.
|
18 |
+
It is fine-tuned on a large parallel dataset of English and Telugu sentences.
|
19 |
+
|
20 |
+
source: "Trained on the English-Telugu Parallel Corpus dataset."
|
21 |
+
citation: |
|
22 |
+
If you use this model, please cite:
|
23 |
+
"English-to-Telugu Translation Model, created by Y. Himavarshini, 2025"
|
24 |
+
|
25 |
+
model_creators:
|
26 |
+
- "Y. Himavarshini"
|
27 |
+
- "Gates Institute of Technology"
|
28 |
+
|
29 |
+
task_categories:
|
30 |
+
- "Machine Translation"
|
31 |
+
|
32 |
+
task_ids:
|
33 |
+
- "translation"
|
34 |
+
|
35 |
+
pretty_name: "English-to-Telugu Translation Model"
|
36 |
+
|
37 |
+
model_config:
|
38 |
+
architecture: "Transformer"
|
39 |
+
num_layers: 6
|
40 |
+
hidden_size: 512
|
41 |
+
vocab_size: 32000
|
42 |
+
|
43 |
+
datasets:
|
44 |
+
- "hima06varshini/english-telugu-parallel-corpus"
|
45 |
+
|
46 |
+
inference_example:
|
47 |
+
- input: "Hello, how are you?"
|
48 |
+
output: "హలో, మీరు ఎలా ఉన్నారు?"
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:734b5556bf566636ee967fca37c75cc40dc3c6ea048cc6ea89e0f496f07fcbad
|
3 |
+
size 109
|
tokenizer.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
+
|
3 |
+
tokenizer = AutoTokenizer.from_pretrained("your-model-name")
|
4 |
+
tokenizer.save_pretrained("model_directory")
|
vocab.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
with open("vocab.txt", "w") as f:
|
2 |
+
for token in tokenizer.get_vocab().keys():
|
3 |
+
f.write(token + "\n")
|