dacorvo HF Staff commited on
Commit
5dc3044
·
verified ·
1 Parent(s): f1460ba

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +29 -0
  2. neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json +1 -0
  3. neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done +0 -0
  4. neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb +3 -0
  5. neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff +3 -0
  6. neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo +3 -0
  7. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json +1 -0
  8. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done +0 -0
  9. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb +3 -0
  10. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff +3 -0
  11. neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo +3 -0
  12. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json +1 -0
  13. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done +0 -0
  14. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb +3 -0
  15. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff +3 -0
  16. neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo +3 -0
  17. neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json +1 -0
  18. neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done +0 -0
  19. neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb +3 -0
  20. neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff +3 -0
  21. neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json +1 -0
  22. neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done +0 -0
  23. neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb +3 -0
  24. neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff +3 -0
  25. neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json +1 -0
  26. neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done +0 -0
  27. neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb +3 -0
  28. neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff +3 -0
  29. neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json +1 -0
  30. neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done +0 -0
  31. neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb +3 -0
  32. neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff +3 -0
  33. neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo +3 -0
  34. neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json +1 -0
  35. neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done +0 -0
  36. neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb +3 -0
  37. neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff +3 -0
  38. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json +63 -0
  39. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json +63 -0
  40. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json +63 -0
  41. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json +63 -0
  42. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json +64 -0
  43. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json +63 -0
  44. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json +64 -0
  45. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json +64 -0
  46. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json +64 -0
  47. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json +64 -0
  48. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json +64 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json +1 -0
  50. neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done +0 -0
.gitattributes CHANGED
@@ -5708,3 +5708,32 @@ neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/model.neff
5708
  neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5709
  neuronxcc-2.21.33363.0+82129205/MODULE_77f1b8fbe51833738f1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5710
  neuronxcc-2.21.33363.0+82129205/MODULE_7b28cd1e3dc6bc844fa3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5708
  neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5709
  neuronxcc-2.21.33363.0+82129205/MODULE_77f1b8fbe51833738f1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5710
  neuronxcc-2.21.33363.0+82129205/MODULE_7b28cd1e3dc6bc844fa3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5711
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
5712
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5713
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
5714
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5715
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
5716
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5717
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
5718
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
5719
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
5720
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
5721
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5722
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
5723
+ neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5724
+ neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5725
+ neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5726
+ neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5727
+ neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5728
+ neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5729
+ neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5730
+ neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5731
+ neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5732
+ neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5733
+ neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5734
+ neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5735
+ neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5736
+ neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5737
+ neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5738
+ neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5739
+ neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaac8a0d1057bbcaaf5fc3dc85fa7699af59ddc07c1c26435640d6112acceff2
3
+ size 378947
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24564b3a426253a6c9a58a73a9dedf7df1bb3c4ae4f9c3029d426f107c9c7b4
3
+ size 1926144
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7485e701ed5c6302ee1967ad86e7d57db68d08f66dca5344c6ee9783377ef51
3
+ size 2000256
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c69e83428c93f3412168937e1df467dd69d10ef46e9576f1fcb96de906e1a86
3
+ size 382812
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76209c24a04f98926f0cfe3fb63de3d27ef082dcdb30d4fec9ee2de7c5f18d37
3
+ size 3032064
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca1eb5edafa963dab3105d66b6044e615664da9c73f0a1a998f7ea8ae9ba07b7
3
+ size 3106169
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d430b5a352b7fbee3742c299062adfd10f1232b12cd924c52e979764a23e3f8
3
+ size 382348
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e24de20088f1002f739a76dff410698f254aab56f4dbdd8cdd5e85d2770eaa7c
3
+ size 2049024
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f5ca74126a5794dacf54743b1bbfd785f5eb4f45e34cb5f0280e864eeb78a0a
3
+ size 2123112
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e997c4e79e96cbcb32a125f0d039e049ef9db52977baa2ba2ff1c61cc44f8728
3
+ size 472299
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6f412e048ddcb9a23fa9f92216ff3eeeb3eeeb78a0c4c5aada3c1962097b44
3
+ size 8203264
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62f6d9bc9df3e2de19505d67e63c54515ab460fea5371f2c82e9e8d3c67322b
3
+ size 467299
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b4934ab602c98cf48c3c0047ec8bdd1706da17f5f5e7e9542e04045911cfeb3
3
+ size 4363264
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0b4a58bc33a450116485bdc97c80db38b76b6cf227f1a396467a16f8a24120
3
+ size 463666
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1635305f59594bb96729057bfbd84fefd31db1795f3e1248314ef8e7b6e273
3
+ size 3953664
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b6e644ce4a647575cac6ee8a2b724e7eeddce2a15d81873dcb06658ee9b43c2
3
+ size 374121
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27888041c8321fa7821e9e2462fa24906b92a55403e477ad4270efcf29f80abf
3
+ size 1117184
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f4497b6c14d756912c64bdf11a914fbeeea99ea086717893013947ddb15959
3
+ size 1209802
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4ab156514485dbf5d74db2535e84136a07f17752c578320af5984844defbff
3
+ size 468446
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa6791c3341dd45d1cdf2724e98350fb7b1b08c8198db84fcd650b33c04117b0
3
+ size 22252544
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 8,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 16384,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 16384,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 16384,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 8
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 8,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 16384,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 16384,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 16384,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 8
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 14336,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
24
+ "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 32,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 16384,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 16384,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 16384,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "bfloat16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 32,
47
+ "num_hidden_layers": 16,
48
+ "num_key_value_heads": 8,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 32.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": true,
60
+ "unsloth_fixed": true,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 16384,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 16384,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 16384,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": false,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": false,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 4096,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 4096,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.3.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 4096,
40
+ "sequence_parallel_enabled": true,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn1",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done ADDED
File without changes