model: name: "p017_omni_ctc_300m_v2_fleurs_fa_ir_final" dataset: name: "thomcles_persian_farsi_speech" train_split: "train" valid_split: "dev" storage_mode: "MIXTURE_PARQUET" task_mode: "ASR" mixture_parquet_storage_config: dataset_summary_path: "/home/simon/github/peacock-asr/projects/P017-Persian-Evals/data/thomcles_persian_omni/language_distribution_0.tsv" beta_corpus: 0.5 beta_language: 0.5 fragment_loading: cache: true asr_task_config: min_audio_len: 16_000 max_audio_len: 960_000 max_num_elements: 960_000 batch_shuffle_window: 1 normalize_audio: true example_shuffle_window: 1 tokenizer: name: "omniASR_tokenizer_written_v2" optimizer: config: lr: 1e-05 trainer: freeze_encoder_for_n_steps: 0 mixed_precision: dtype: "torch.bfloat16" grad_accumulation: num_batches: 8 activation_checkpointing: mode: "layerwise" every_nth_layer: 1 gc_every_n_steps: 100 regime: num_steps: 5_000 validate_every_n_steps: 500 validate_after_n_steps: 499 checkpoint_every_n_steps: 500 checkpoint_after_n_steps: 499 save_model_only: "all_but_last" keep_last_n_checkpoints: 2 publish_metrics_every_n_steps: 1 publish_metrics_after_n_steps: 0