koutch/paper_smol_3.json_train_dpo_v1_train_no_think Text Generation • 3B • Updated about 5 hours ago • 4
koutch/paper_qwen_3.json_train_dpo_v1_train_no_think Text Generation • 4B • Updated about 6 hours ago • 14
koutch/paper_smol_smol3-3B_train_sft_train_think Text Generation • 3B • Updated about 6 hours ago • 32
koutch/paper_qwen_qwen3-instruct-4b_train_sft_train_think Text Generation • 4B • Updated about 8 hours ago • 9
koutch/paper_smol_smol3-3B_train_sft_train_no_think Text Generation • 3B • Updated about 9 hours ago • 10
koutch/paper_llama_llama3.1-8b_train_sft_train_think Text Generation • 8B • Updated about 9 hours ago • 15
koutch/paper_llama_llama3.1-8b_train_sft_train_no_think Text Generation • 8B • Updated about 9 hours ago • 19
koutch/paper_llama_llama3.1-8b_train_sft_train_para Text Generation • 8B • Updated about 9 hours ago • 13
koutch/paper_smol_smol3-3B_train_sft_train_para Text Generation • 3B • Updated about 9 hours ago • 12
koutch/paper_smol_smol3-3B_train_sft_all_train_think Text Generation • 3B • Updated about 9 hours ago • 13
koutch/paper_qwen_qwen3-instruct-4b_train_sft_train_no_think Text Generation • 4B • Updated about 9 hours ago • 16
koutch/paper_qwen_qwen3-instruct-4b_train_sft_train_para Text Generation • 4B • Updated about 9 hours ago • 19
koutch/paper_qwen_qwen3-instruct-4b_train_sft_all_train_think Text Generation • 4B • Updated about 11 hours ago • 15
koutch/paper_llama_llama3.1-8b_train_sft_all_train_think Text Generation • 8B • Updated 1 day ago • 20
koutch/short_paper_llama_2.json_train_dpo_v1_train_no_think Text Generation • 8B • Updated 3 days ago • 52
koutch/short_paper_llama_2.json_train_dpo_v2_train_no_think Text Generation • 8B • Updated 3 days ago • 41
koutch/short_paper_qwen_2.json_train_dpo_v2_train_no_think Text Generation • 4B • Updated 3 days ago • 48
koutch/short_paper_qwen_2.json_train_dpo_v1_train_no_think Text Generation • 4B • Updated 3 days ago • 40
koutch/short_paper_llama_llama3.1-8b_train_sft_all_train_no_think Text Generation • 8B • Updated 3 days ago • 150
koutch/short_paper_llama_llama3.1-8b_train_sft_train_no_think Text Generation • 8B • Updated 3 days ago • 306
koutch/short_paper_qwen_qwen3-instruct-4b_train_sft_all_train_no_think Text Generation • 4B • Updated 3 days ago • 129
koutch/short_paper_llama_llama3.1-8b_train_sft_train_para Text Generation • 8B • Updated 3 days ago • 191
koutch/short_paper_smol_2.json_train_dpo_v2_train_no_think Text Generation • 3B • Updated 3 days ago • 45
koutch/short_paper_smol_2.json_train_dpo_v1_train_no_think Text Generation • 3B • Updated 3 days ago • 49
koutch/short_paper_smol_smol3-3B_train_sft_train_no_think Text Generation • 3B • Updated 4 days ago • 313
koutch/short_paper_qwen_qwen3-instruct-4b_train_sft_train_para Text Generation • 4B • Updated 4 days ago • 181
koutch/short_paper_smol_smol3-3B_train_sft_train_para Text Generation • 3B • Updated 4 days ago • 188
koutch/short_paper_qwen_qwen3-instruct-4b_train_sft_train_no_think Text Generation • 4B • Updated 4 days ago • 287
koutch/short_paper_smol_smol3-3B_train_sft_all_train_no_think Text Generation • 3B • Updated 4 days ago • 150
koutch/short_paper_llama_1.json_train_dpo_v3_train_no_think Text Generation • 8B • Updated 5 days ago • 52