model_name_or_path: meta-llama/Llama-3.1-70B
deepspeed: examples/deepspeed/ds_z3_config.json
preprocessing_num_workers: 16
output_dir: ../dataflex_saves/Llama-3.1-70B/less_setting2
overwrite_output_dir: true
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
lr_scheduler_type: cosine
train_type: dynamic_select
components_cfg_file: src/dataflex/configs/components.yaml