Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +29 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json +1 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done +0 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb +3 -0
- neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json +64 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done +0 -0
.gitattributes
CHANGED
|
@@ -5708,3 +5708,32 @@ neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/model.neff
|
|
| 5708 |
neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5709 |
neuronxcc-2.21.33363.0+82129205/MODULE_77f1b8fbe51833738f1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5710 |
neuronxcc-2.21.33363.0+82129205/MODULE_7b28cd1e3dc6bc844fa3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5708 |
neuronxcc-2.21.33363.0+82129205/MODULE_ccc2d4abbb67d5b7286f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5709 |
neuronxcc-2.21.33363.0+82129205/MODULE_77f1b8fbe51833738f1c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5710 |
neuronxcc-2.21.33363.0+82129205/MODULE_7b28cd1e3dc6bc844fa3+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5711 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5712 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5713 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5714 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5715 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5716 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5717 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5718 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5719 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5720 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5721 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5722 |
+
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5723 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5724 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_1351bde3344f209ba8a1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5725 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_20ded251b25e18aad5b0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5726 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_35a63695aeefa2d44798+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5727 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_69ce67073ba646143ed4+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5728 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5729 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_7c2638502a7538102290+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5730 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_8976186fc1a14a570398+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5731 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5732 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_b1cd3aa0106a6921386d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5733 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5734 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_d053e5a2bdf45f8b64ef+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 5735 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_dc7fcef2e19ad392688a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5736 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_eadbca4e425481d07426+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5737 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f03f06a24a40a8dbe7e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5738 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 5739 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fd44e24d672157f0bc14+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaac8a0d1057bbcaaf5fc3dc85fa7699af59ddc07c1c26435640d6112acceff2
|
| 3 |
+
size 378947
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f24564b3a426253a6c9a58a73a9dedf7df1bb3c4ae4f9c3029d426f107c9c7b4
|
| 3 |
+
size 1926144
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_454cbfa5310f70cadd4c+617f6939/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7485e701ed5c6302ee1967ad86e7d57db68d08f66dca5344c6ee9783377ef51
|
| 3 |
+
size 2000256
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c69e83428c93f3412168937e1df467dd69d10ef46e9576f1fcb96de906e1a86
|
| 3 |
+
size 382812
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76209c24a04f98926f0cfe3fb63de3d27ef082dcdb30d4fec9ee2de7c5f18d37
|
| 3 |
+
size 3032064
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b55e4f26a3acec8be46+617f6939/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca1eb5edafa963dab3105d66b6044e615664da9c73f0a1a998f7ea8ae9ba07b7
|
| 3 |
+
size 3106169
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d430b5a352b7fbee3742c299062adfd10f1232b12cd924c52e979764a23e3f8
|
| 3 |
+
size 382348
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e24de20088f1002f739a76dff410698f254aab56f4dbdd8cdd5e85d2770eaa7c
|
| 3 |
+
size 2049024
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5122ddbc31f7f615af79+617f6939/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f5ca74126a5794dacf54743b1bbfd785f5eb4f45e34cb5f0280e864eeb78a0a
|
| 3 |
+
size 2123112
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e997c4e79e96cbcb32a125f0d039e049ef9db52977baa2ba2ff1c61cc44f8728
|
| 3 |
+
size 472299
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_8926597e9069871b192d+ad9e832d/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f6f412e048ddcb9a23fa9f92216ff3eeeb3eeeb78a0c4c5aada3c1962097b44
|
| 3 |
+
size 8203264
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a62f6d9bc9df3e2de19505d67e63c54515ab460fea5371f2c82e9e8d3c67322b
|
| 3 |
+
size 467299
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_9a73716a575cb3cb95df+ad9e832d/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b4934ab602c98cf48c3c0047ec8bdd1706da17f5f5e7e9542e04045911cfeb3
|
| 3 |
+
size 4363264
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb0b4a58bc33a450116485bdc97c80db38b76b6cf227f1a396467a16f8a24120
|
| 3 |
+
size 463666
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3fcf757bda6afe1679e+ad9e832d/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed1635305f59594bb96729057bfbd84fefd31db1795f3e1248314ef8e7b6e273
|
| 3 |
+
size 3953664
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=1 --vectorize-strided-dma ", "--lnc=1", "-O2", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b6e644ce4a647575cac6ee8a2b724e7eeddce2a15d81873dcb06658ee9b43c2
|
| 3 |
+
size 374121
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27888041c8321fa7821e9e2462fa24906b92a55403e477ad4270efcf29f80abf
|
| 3 |
+
size 1117184
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_eacd5f9beef23723ae4a+617f6939/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79f4497b6c14d756912c64bdf11a914fbeeea99ea086717893013947ddb15959
|
| 3 |
+
size 1209802
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a4ab156514485dbf5d74db2535e84136a07f17752c578320af5984844defbff
|
| 3 |
+
size 468446
|
neuronxcc-2.21.18209.0+043b1bf7/MODULE_f8b4f545fd646928f740+ad9e832d/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa6791c3341dd45d1cdf2724e98350fb7b1b08c8198db84fcd650b33c04117b0
|
| 3 |
+
size 22252544
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/0451dd4ad8e23b64aa0c.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/837dd017e8ae4530668c.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 8,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 16384,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 16384,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 16384,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 8
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/a36d353642d325c5397f.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 8,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 16384,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 16384,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 16384,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 8
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/meta-llama/Llama-3.1-8B-Instruct/cfdcd4c75495aba1f95d.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 14336,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 24 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 32,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0fef0800e360b8e2aa85.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 16384,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 16384,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 16384,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/3403115c880863fe4065.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 4,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"speculation_length": 0,
|
| 41 |
+
"start_rank_id": 0,
|
| 42 |
+
"target": "trn1",
|
| 43 |
+
"torch_dtype": "bfloat16",
|
| 44 |
+
"tp_degree": 2
|
| 45 |
+
},
|
| 46 |
+
"num_attention_heads": 32,
|
| 47 |
+
"num_hidden_layers": 16,
|
| 48 |
+
"num_key_value_heads": 8,
|
| 49 |
+
"pretraining_tp": 1,
|
| 50 |
+
"rms_norm_eps": 1e-05,
|
| 51 |
+
"rope_scaling": {
|
| 52 |
+
"factor": 32.0,
|
| 53 |
+
"high_freq_factor": 4.0,
|
| 54 |
+
"low_freq_factor": 1.0,
|
| 55 |
+
"original_max_position_embeddings": 8192,
|
| 56 |
+
"rope_type": "llama3"
|
| 57 |
+
},
|
| 58 |
+
"rope_theta": 500000.0,
|
| 59 |
+
"tie_word_embeddings": true,
|
| 60 |
+
"unsloth_fixed": true,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/98b3fa9a890ef0e1f391.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/a6f67c366d986ef4b636.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 16384,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 16384,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 16384,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/c87e429d1d13a13279bd.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/cfe0f6333cc973602102.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": false,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": false,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.3.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e75d2005d39261b7d1d8.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 4096,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 4096,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.3.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 4096,
|
| 40 |
+
"sequence_parallel_enabled": true,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn1",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_03563a07b466ff1a7583+24129607/model.done
ADDED
|
File without changes
|