chikingsley's picture
Add files using upload-large-folder tool
556cb41 verified
model:
name: "p017_omni_ctc_300m_v2_fleurs_fa_ir_final"
dataset:
name: "thomcles_persian_farsi_speech"
train_split: "train"
valid_split: "dev"
storage_mode: "MIXTURE_PARQUET"
task_mode: "ASR"
mixture_parquet_storage_config:
dataset_summary_path: "/home/simon/github/peacock-asr/projects/P017-Persian-Evals/data/thomcles_persian_omni/language_distribution_0.tsv"
beta_corpus: 0.5
beta_language: 0.5
fragment_loading:
cache: true
asr_task_config:
min_audio_len: 16_000
max_audio_len: 960_000
max_num_elements: 960_000
batch_shuffle_window: 1
normalize_audio: true
example_shuffle_window: 1
tokenizer:
name: "omniASR_tokenizer_written_v2"
optimizer:
config:
lr: 1e-05
trainer:
freeze_encoder_for_n_steps: 0
mixed_precision:
dtype: "torch.bfloat16"
grad_accumulation:
num_batches: 8
activation_checkpointing:
mode: "layerwise"
every_nth_layer: 1
gc_every_n_steps: 100
regime:
num_steps: 5_000
validate_every_n_steps: 500
validate_after_n_steps: 499
checkpoint_every_n_steps: 500
checkpoint_after_n_steps: 499
save_model_only: "all_but_last"
keep_last_n_checkpoints: 2
publish_metrics_every_n_steps: 1
publish_metrics_after_n_steps: 0