@@ -10,69 +10,40 @@ import Foundation
1010import MLX
1111import MLXLMCommon
1212import MLXNN
13-
14- public struct BailingMoeConfiguration : Codable , Sendable {
15- var modelType : String
16- var hiddenSize : Int
17- var intermediateSize : Int
18- var maxPositionEmbeddings : Int ?
19- var moeIntermediateSize : Int
20- var numExperts : Int
21- var numSharedExperts : Int
22- var normTopkProb : Bool
23- var attentionHeads : Int
24- var numExpertsPerToken : Int
25- var hiddenLayers : Int
26- var kvHeads : Int
27- var rmsNormEps : Float
28- var ropeTheta : Float
29- var vocabularySize : Int
30- var firstKDenseReplace : Int
13+ import ReerCodable
14+
15+ @Codable
16+ public struct BailingMoeConfiguration : Sendable {
17+ @CodingKey ( " model_type " ) public var modelType : String
18+ @CodingKey ( " hidden_size " ) public var hiddenSize : Int
19+ @CodingKey ( " intermediate_size " ) public var intermediateSize : Int
20+ @CodingKey ( " max_position_embeddings " ) public var maxPositionEmbeddings : Int ?
21+ @CodingKey ( " moe_intermediate_size " ) public var moeIntermediateSize : Int
22+ @CodingKey ( " num_experts " ) public var numExperts : Int
23+ @CodingKey ( " num_shared_experts " ) public var numSharedExperts : Int
24+ @CodingKey ( " norm_topk_prob " ) public var normTopkProb : Bool
25+ @CodingKey ( " num_attention_heads " ) public var attentionHeads : Int
26+ @CodingKey ( " num_experts_per_tok " ) public var numExpertsPerToken : Int
27+ @CodingKey ( " num_hidden_layers " ) public var hiddenLayers : Int
28+ @CodingKey ( " num_key_value_heads " ) public var kvHeads : Int
29+ @CodingKey ( " rms_norm_eps " ) public var rmsNormEps : Float
30+ @CodingKey ( " rope_theta " ) public var ropeTheta : Float
31+ @CodingKey ( " vocab_size " ) public var vocabularySize : Int
32+ @CodingKey ( " first_k_dense_replace " ) public var firstKDenseReplace : Int
3133
3234 // Optional features
33- var ropeScaling : [ String : StringOrNumber ] ? = nil
34- var useBias : Bool = false
35- var useQKVBias : Bool = false
36- var useQKNorm : Bool = false
37- var tieWordEmbeddings : Bool = false
38- var partialRotaryFactor : Float = 1.0
39- var moeRouterEnableExpertBias : Bool = false
40- var routedScalingFactor : Float = 1.0
41- var scoreFunction : String = " softmax "
42- var nGroup : Int = 1
43- var topkGroup : Int = 4
44- var moeSharedExpertIntermediateSize : Int ? = nil
45-
46- enum CodingKeys : String , CodingKey {
47- case modelType = " model_type "
48- case hiddenSize = " hidden_size "
49- case intermediateSize = " intermediate_size "
50- case maxPositionEmbeddings = " max_position_embeddings "
51- case moeIntermediateSize = " moe_intermediate_size "
52- case numExperts = " num_experts "
53- case numSharedExperts = " num_shared_experts "
54- case normTopkProb = " norm_topk_prob "
55- case attentionHeads = " num_attention_heads "
56- case numExpertsPerToken = " num_experts_per_tok "
57- case hiddenLayers = " num_hidden_layers "
58- case kvHeads = " num_key_value_heads "
59- case rmsNormEps = " rms_norm_eps "
60- case ropeTheta = " rope_theta "
61- case vocabularySize = " vocab_size "
62- case firstKDenseReplace = " first_k_dense_replace "
63- case ropeScaling = " rope_scaling "
64- case useBias = " use_bias "
65- case useQKVBias = " use_qkv_bias "
66- case useQKNorm = " use_qk_norm "
67- case tieWordEmbeddings = " tie_word_embeddings "
68- case partialRotaryFactor = " partial_rotary_factor "
69- case moeRouterEnableExpertBias = " moe_router_enable_expert_bias "
70- case routedScalingFactor = " routed_scaling_factor "
71- case scoreFunction = " score_function "
72- case nGroup = " n_group "
73- case topkGroup = " topk_group "
74- case moeSharedExpertIntermediateSize = " moe_shared_expert_intermediate_size "
75- }
35+ @CodingKey ( " rope_scaling " ) public var ropeScaling : [ String : StringOrNumber ] ? = nil
36+ @CodingKey ( " use_bias " ) public var useBias : Bool = false
37+ @CodingKey ( " use_qkv_bias " ) public var useQKVBias : Bool = false
38+ @CodingKey ( " use_qk_norm " ) public var useQKNorm : Bool = false
39+ @CodingKey ( " tie_word_embeddings " ) public var tieWordEmbeddings : Bool = false
40+ @CodingKey ( " partial_rotary_factor " ) public var partialRotaryFactor : Float = 1.0
41+ @CodingKey ( " moe_router_enable_expert_bias " ) public var moeRouterEnableExpertBias : Bool = false
42+ @CodingKey ( " routed_scaling_factor " ) public var routedScalingFactor : Float = 1.0
43+ @CodingKey ( " score_function " ) public var scoreFunction : String = " softmax "
44+ @CodingKey ( " n_group " ) public var nGroup : Int = 1
45+ @CodingKey ( " topk_group " ) public var topkGroup : Int = 4
46+ @CodingKey ( " moe_shared_expert_intermediate_size " ) public var moeSharedExpertIntermediateSize : Int ? = nil
7647}
7748
7849private class Attention : Module {
0 commit comments