| { | |
| "vocab_size": 50257, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "n_kv_heads": 4, | |
| "embed_dim": 512, | |
| "ff_dim": 1792, | |
| "max_seq_len": 512, | |
| "dropout": 0.1, | |
| "activation": "swiglu", | |
| "attention_dropout": 0.1, | |
| "pos_encoding": "rotary", | |
| "tie_word_embeddings": true | |
| } |
| { | |
| "vocab_size": 50257, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "n_kv_heads": 4, | |
| "embed_dim": 512, | |
| "ff_dim": 1792, | |
| "max_seq_len": 512, | |
| "dropout": 0.1, | |
| "activation": "swiglu", | |
| "attention_dropout": 0.1, | |
| "pos_encoding": "rotary", | |
| "tie_word_embeddings": true | |
| } |