chincyk
/

PyCodeGen

@@ -1,12 +1,14 @@
----
-library_name: transformers
-tags:
-- code
-license: mit
-datasets:
-- iamtarun/python_code_instructions_18k_alpaca
-pipeline_tag: text-generation
----
 # PyCodeGen 350M
@@ -32,6 +34,10 @@ Finally model has been adapted to the Python language by training on the BigPyth
 The dataset contains problem descriptions and code in python language.
 This dataset is taken from sahil2801/code_instructions_120k, which adds a prompt column in alpaca style.
 ## Example of usage
@@ -68,25 +74,25 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
 ## Training parameters
 BitsAndBytes:
-- load_in_4bit=True,
-- bnb_4bit_quant_type="nf4",
-- bnb_4bit_use_double_quant=True,
-- bnb_4bit_compute_dtype=torch.bfloat16
 LoraConfig:
-- r=32,
-- lora_alpha=16,
-- target_modules='all-linear',
-- lora_dropout=0.1,
-- bias='none',
-- task_type='CASUAL_LM'
 Finetuning:
-- num_epochs = 15
-- train_batch_size = 4
-- eval_batch_size = 8
-- gradient_accumulation_steps = 8
-- learning_rate = 3e-4
-- weight_decay = 0.01
-- lr_scheduler_name = "cosine"
-- num_warmup_steps = 190

+---
+library_name: transformers
+tags:
+- code
+license: mit
+datasets:
+- iamtarun/python_code_instructions_18k_alpaca
+pipeline_tag: text-generation
+language:
+- en
+---
 # PyCodeGen 350M
 The dataset contains problem descriptions and code in python language.
 This dataset is taken from sahil2801/code_instructions_120k, which adds a prompt column in alpaca style.
+## Intended uses
+The model can be used to generate python code that solves task with optionally given input data.
 ## Example of usage
 ## Training parameters
 BitsAndBytes:
+- load_in_4bit: True,
+- bnb_4bit_quant_type: nf4,
+- bnb_4bit_use_double_quant: True,
+- bnb_4bit_compute_dtype: torch.bfloat16
 LoraConfig:
+- r: 32,
+- lora_alpha: 16,
+- target_modules: all-linear,
+- lora_dropout: 0.1,
+- bias: none,
+- task_type: CASUAL_LM
 Finetuning:
+- num_epochs: 15
+- train_batch_size: 4
+- eval_batch_size: 8
+- gradient_accumulation_steps: 8
+- learning_rate: 3e-4
+- weight_decay: 0.01
+- lr_scheduler_name: cosine
+- num_warmup_steps: 190