File size: 12,874 Bytes
f92290e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python3
"""
create_test_embedding_lora.py
Create a test LoRA adapter containing specified modules
Based on correct dimension specifications from SGLang layers.py
"""
import json
import os
import torch
from pathlib import Path

def create_test_embedding_lora(
    output_dir="./test_embedding_lora",
    base_model="meta-llama/Llama-2-7b-hf",
    lora_rank=8,
    lora_alpha=16,
    target_modules=None,
    added_tokens=None,
):
    """
    Create a test LoRA adapter containing specified modules
    
    Args:
        output_dir: Output directory
        base_model: Base model name
        lora_rank: LoRA rank
        lora_alpha: LoRA alpha
        target_modules: List of target modules to generate LoRA for, defaults to ["embed_tokens", "lm_head"]
        added_tokens: Content of added_tokens.json (dictionary), defaults to empty
    
    Supported target_modules:
        - embed_tokens: Word embedding layer
        - lm_head: Language model head
        - q_proj, k_proj, v_proj, o_proj: Attention layers
        - gate_proj, up_proj, down_proj: FFN layers
    """
    
    # Default: only generate embed_tokens and lm_head
    if target_modules is None:
        # target_modules = ["embed_tokens", "lm_head"]
        target_modules = ["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
    
    # Llama-2-7b configuration
    vocab_size = 32000
    embedding_dim = 4096
    hidden_dim = 4096
    intermediate_size = 11008  # FFN intermediate dimension
    
    print(f"Creating test LoRA adapter in {output_dir}")
    print(f"  vocab_size: {vocab_size}")
    print(f"  embedding_dim: {embedding_dim}")
    print(f"  hidden_dim: {hidden_dim}")
    print(f"  intermediate_size: {intermediate_size}")
    print(f"  lora_rank: {lora_rank}")
    print(f"  lora_alpha: {lora_alpha}")
    print(f"  target_modules: {target_modules}")
    print()
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Define weight shapes for each module
    module_shapes = {
        # Embedding layer: vocab_size -> embedding_dim
        "embed_tokens": {
            "lora_A": (lora_rank, vocab_size),
            "lora_B": (embedding_dim, lora_rank),
        },
        # LM head: hidden_dim -> vocab_size
        "lm_head": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (vocab_size, lora_rank),
        },
        # Attention layers: hidden_dim -> hidden_dim
        "q_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (hidden_dim, lora_rank),
        },
        "k_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (hidden_dim, lora_rank),
        },
        "v_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (hidden_dim, lora_rank),
        },
        "o_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (hidden_dim, lora_rank),
        },
        # FFN layers
        "gate_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (intermediate_size, lora_rank),
        },
        "up_proj": {
            "lora_A": (lora_rank, hidden_dim),
            "lora_B": (intermediate_size, lora_rank),
        },
        "down_proj": {
            "lora_A": (lora_rank, intermediate_size),
            "lora_B": (hidden_dim, lora_rank),
        },
    }
    
    # Create LoRA weights
    print("Creating LoRA weights with shapes:")
    lora_weights = {}
    
    for module in target_modules:
        if module not in module_shapes:
            print(f"โš ๏ธ  Warning: Unknown module '{module}', skipping...")
            continue
            
        shapes = module_shapes[module]
        
        # Decide weight name prefix based on module type
        if module == "embed_tokens":
            prefix = "base_model.model.model.embed_tokens"
        elif module == "lm_head":
            prefix = "base_model.model.lm_head"
        else:
            # Other layers (attention, FFN) need to be created for each layer
            # Here we create the first layer as an example
            prefix = f"base_model.model.model.layers.0.self_attn.{module}" if module in ["q_proj", "k_proj", "v_proj", "o_proj"] else f"base_model.model.model.layers.0.mlp.{module}"
        
        lora_A_shape = shapes["lora_A"]
        lora_B_shape = shapes["lora_B"]
        
        print(f"  {module}.lora_A: {lora_A_shape}")
        print(f"  {module}.lora_B: {lora_B_shape}")
        
        if "embed_tokens" in module:
            lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 0.01
            lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 0.01
            # lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 1
            # lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 1
        else:
            lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 0.01
            lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 0.01
            # lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 1
            # lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 1
        
        print(lora_weights)

    
    print()
    
    # Verify created weight shapes
    print("Verifying created weight shapes:")
    for name, weight in lora_weights.items():
        print(f"  {name}: {weight.shape}")
    print()
    
    # Save as safetensors format
    try:
        from safetensors.torch import save_file
        save_file(lora_weights, os.path.join(output_dir, "adapter_model.safetensors"))
        print(f"โœ… Saved adapter_model.safetensors")
    except ImportError:
        # If safetensors is not available, use pytorch format
        torch.save(lora_weights, os.path.join(output_dir, "adapter_model.bin"))
        print(f"โœ… Saved adapter_model.bin (safetensors not available)")
    
    # Create adapter_config.json
    adapter_config = {
        "auto_mapping": None,
        "base_model_name_or_path": base_model,
        "bias": "none",
        "fan_in_fan_out": False,
        "inference_mode": True,
        "init_lora_weights": True,
        "layers_pattern": None,
        "layers_to_transform": None,
        "lora_alpha": lora_alpha,
        "lora_dropout": 0.0,
        "modules_to_save": None,
        "peft_type": "LORA",
        "r": lora_rank,
        "revision": None,
        "target_modules": target_modules,
        "task_type": "CAUSAL_LM"
    }
    
    with open(os.path.join(output_dir, "adapter_config.json"), "w") as f:
        json.dump(adapter_config, f, indent=2)
    print(f"โœ… Saved adapter_config.json")
    
    # Create added_tokens.json
    if added_tokens is None:
        added_tokens = {}
    
    with open(os.path.join(output_dir, "added_tokens.json"), "w") as f:
        json.dump(added_tokens, f, indent=2)
    print(f"โœ… Saved added_tokens.json")
    

    # Create config.json (base model config)
    model_config = {
        "architectures": ["LlamaForCausalLM"],
        "model_type": "llama",
        "vocab_size": vocab_size,
        "hidden_size": hidden_dim,
        "intermediate_size": intermediate_size,
        "num_attention_heads": 32,
        "num_hidden_layers": 32,
        "num_key_value_heads": 32,
        "max_position_embeddings": 4096,
        "rms_norm_eps": 1e-05,
        "rope_theta": 10000.0,
        "torch_dtype": "float16",
        "transformers_version": "4.36.0"
    }

    with open(os.path.join(output_dir, "config.json"), "w") as f:
        json.dump(model_config, f, indent=2)
    print(f"โœ… Saved config.json")
    
    #################################
    try:
        from transformers import AutoTokenizer
        print(f"Copying tokenizer files from {base_model}...")
        
        base_tokenizer = AutoTokenizer.from_pretrained(base_model)
        base_tokenizer.save_pretrained(output_dir)
        print(f"โœ… Saved tokenizer files (tokenizer_config.json, tokenizer.json, etc.)")
    except Exception as e:
        print(f"โš ๏ธ  Warning: Could not copy tokenizer files: {e}")
        print(f"    HuggingFace tests with embed_tokens may fail.")
    # #################################
     
    # Create README
    readme = f"""# Test LoRA Adapter

This is a test LoRA adapter with customizable target modules.

## Configuration
- Base model: {base_model}
- LoRA rank (r): {lora_rank}
- LoRA alpha: {lora_alpha}
- Target modules: {', '.join(target_modules)}

## Weight Shapes
"""
    
    for module in target_modules:
        if module in module_shapes:
            shapes = module_shapes[module]
            readme += f"- {module}.lora_A: {shapes['lora_A']}\n"
            readme += f"- {module}.lora_B: {shapes['lora_B']}\n"
    
    readme += f"""
## Usage with SGLang

python hf_sgl_difference.py \\
    --model-path {base_model} \\
    --lora-paths {output_dir} \\
    --attention-backend triton \\
    --lora-backend triton \\
    --port 30000 \\
    --disable-cuda-graph \\
    --output-dir ./logprob_results## Note
This adapter contains randomly initialized weights for testing purposes only.
"""
    
    with open(os.path.join(output_dir, "README.md"), "w") as f:
        f.write(readme)
    print(f"โœ… Saved README.md")
    
    print(f"\n๐ŸŽ‰ Test LoRA adapter created successfully!")
    print(f"\n๐Ÿ“ Output directory: {output_dir}")

if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(
        description="Create test LoRA adapter with customizable target modules",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Default: generate embed_tokens and lm_head
  python create_test_embedding_layer.py
  
  # Generate only attention layers
  python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj
  
  # Generate all supported layers
  python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj
  
  # Specify custom parameters
  python create_test_embedding_layer.py \\
      --output-dir ./my_lora \\
      --base-model meta-llama/Llama-2-7b-hf \\
      --lora-rank 16 \\
      --lora-alpha 32 \\
      --target-modules q_proj k_proj v_proj
  
  # Specify added_tokens
  python create_test_embedding_layer.py --added-tokens '{"<special>": 32000}'
        """
    )
    
    parser.add_argument("--output-dir", type=str, default="./test_embedding_lora",
                        help="Output directory for the adapter")
    parser.add_argument("--base-model", type=str, default="meta-llama/Llama-2-7b-hf",
                        help="Base model name or path")
    parser.add_argument("--lora-rank", type=int, default=8,
                        help="LoRA rank (r)")
    parser.add_argument("--lora-alpha", type=int, default=16,
                        help="LoRA alpha (scaling factor)")
    parser.add_argument("--target-modules", type=str, nargs="+", 
                        default=["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
                        help="Target modules for LoRA. Supported: embed_tokens, lm_head, "
                             "q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj")
    parser.add_argument("--added-tokens", type=str, default=None,
                        help="JSON string for added_tokens.json (e.g., '{\"<special>\": 32000}'). "
                             "Default is empty dict")
    
    args = parser.parse_args()
    
    # Parse added_tokens JSON
    added_tokens_dict = None
    if args.added_tokens:
        try:
            added_tokens_dict = json.loads(args.added_tokens)
        except json.JSONDecodeError as e:
            print(f"โŒ Error parsing added_tokens JSON: {e}")
            exit(1)
    
    create_test_embedding_lora(
        output_dir=args.output_dir,
        base_model=args.base_model,
        lora_rank=args.lora_rank,
        lora_alpha=args.lora_alpha,
        target_modules=args.target_modules,
        added_tokens=added_tokens_dict,
    )

    
# # Default: only generate embed_tokens and lm_head
# python create_test_embedding_layer.py

# # Generate only attention layers
# python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj

# # Generate all layers
# python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj

# # Full customization
# python create_test_embedding_layer.py \
#     --output-dir ./my_custom_lora \
#     --base-model meta-llama/Llama-2-7b-hf \
#     --lora-rank 16 \
#     --lora-alpha 32 \
#     --target-modules q_proj k_proj v_proj \
#     --added-tokens '{"<|im_start|>": 32000, "<|im_end|>": 32001}'