hima06varshini commited on
Commit
9e1f23b
·
verified ·
1 Parent(s): f23c752

Upload 7 files

Browse files
Files changed (7) hide show
  1. README.md.txt +39 -0
  2. config.json +9 -0
  3. inference.py +16 -0
  4. model_card.yaml.txt +48 -0
  5. pytorch_model.bin +3 -0
  6. tokenizer.json +4 -0
  7. vocab.txt +3 -0
README.md.txt ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ - te
5
+ tags:
6
+ - translation
7
+ - machine-translation
8
+ - NLP
9
+ - pytorch
10
+ license: "MIT"
11
+ datasets:
12
+ - hima06varshini/english-telugu-parallel-corpus
13
+ widget:
14
+ - text: "Hello, how are you?"
15
+ ---
16
+
17
+ # 🏆 English-to-Telugu Translation Model
18
+
19
+ This model translates **English** text into **Telugu** using a Transformer-based approach.
20
+
21
+ ## 📂 Model Details
22
+ - **Model Name**: `hima06varshini/english-to-telugu-translation`
23
+ - **Developed by**: Y. Himavarshini
24
+ - **License**: MIT License
25
+
26
+ ## 🚀 Usage
27
+ ```python
28
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
29
+
30
+ model_name = "hima06varshini/english-to-telugu-translation"
31
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+
34
+ def translate(text):
35
+ inputs = tokenizer(text, return_tensors="pt")
36
+ outputs = model.generate(**inputs)
37
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
38
+
39
+ print(translate("Hello, how are you?"))
config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "transformer",
3
+ "num_layers": 6,
4
+ "hidden_size": 512,
5
+ "vocab_size": 32000,
6
+ "max_position_embeddings": 512,
7
+ "attention_heads": 8,
8
+ "dropout_rate": 0.1
9
+ }
inference.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+
3
+ # Load model and tokenizer
4
+ model_name = "hima06varshini/english-to-telugu-translation"
5
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+
8
+ def translate(text):
9
+ inputs = tokenizer(text, return_tensors="pt")
10
+ outputs = model.generate(**inputs)
11
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
12
+
13
+ # Example usage
14
+ text = "Hello, how are you?"
15
+ translation = translate(text)
16
+ print("Translated text:", translation)
model_card.yaml.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "english-to-telugu-translation"
2
+ tags:
3
+ - translation
4
+ - machine-translation
5
+ - english
6
+ - telugu
7
+ library_name: "transformers"
8
+ license: "MIT"
9
+ language:
10
+ - en
11
+ - te
12
+ metrics:
13
+ - "BLEU"
14
+ - "ROUGE"
15
+ - "METEOR"
16
+ model_description: |
17
+ This model is trained for English-to-Telugu translation using a Transformer-based architecture.
18
+ It is fine-tuned on a large parallel dataset of English and Telugu sentences.
19
+
20
+ source: "Trained on the English-Telugu Parallel Corpus dataset."
21
+ citation: |
22
+ If you use this model, please cite:
23
+ "English-to-Telugu Translation Model, created by Y. Himavarshini, 2025"
24
+
25
+ model_creators:
26
+ - "Y. Himavarshini"
27
+ - "Gates Institute of Technology"
28
+
29
+ task_categories:
30
+ - "Machine Translation"
31
+
32
+ task_ids:
33
+ - "translation"
34
+
35
+ pretty_name: "English-to-Telugu Translation Model"
36
+
37
+ model_config:
38
+ architecture: "Transformer"
39
+ num_layers: 6
40
+ hidden_size: 512
41
+ vocab_size: 32000
42
+
43
+ datasets:
44
+ - "hima06varshini/english-telugu-parallel-corpus"
45
+
46
+ inference_example:
47
+ - input: "Hello, how are you?"
48
+ output: "హలో, మీరు ఎలా ఉన్నారు?"
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734b5556bf566636ee967fca37c75cc40dc3c6ea048cc6ea89e0f496f07fcbad
3
+ size 109
tokenizer.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+
3
+ tokenizer = AutoTokenizer.from_pretrained("your-model-name")
4
+ tokenizer.save_pretrained("model_directory")
vocab.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ with open("vocab.txt", "w") as f:
2
+ for token in tokenizer.get_vocab().keys():
3
+ f.write(token + "\n")