Joblib
ynuozhang commited on
Commit
36d2203
·
1 Parent(s): ec353cd

update data

Browse files
Files changed (29) hide show
  1. training_data_cleaned/half_life/halflife_smiles_with_embeddings/dataset_dict.json +1 -0
  2. training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/data-00000-of-00001.arrow +3 -0
  3. training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/dataset_info.json +23 -0
  4. training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/state.json +13 -0
  5. training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
  6. training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/data-00000-of-00001.arrow +3 -0
  7. training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/dataset_info.json +33 -0
  8. training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/state.json +13 -0
  9. training_data_cleaned/half_life/halflife_wt_with_embeddings/dataset_dict.json +1 -0
  10. training_data_cleaned/half_life/halflife_wt_with_embeddings/train/data-00000-of-00001.arrow +3 -0
  11. training_data_cleaned/half_life/halflife_wt_with_embeddings/train/dataset_info.json +27 -0
  12. training_data_cleaned/half_life/halflife_wt_with_embeddings/train/state.json +13 -0
  13. training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/dataset_dict.json +1 -0
  14. training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/data-00000-of-00001.arrow +3 -0
  15. training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/dataset_info.json +63 -0
  16. training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/state.json +13 -0
  17. training_data_cleaned/toxicity/smiles_toxicity.csv +3 -0
  18. training_data_cleaned/toxicity/tox_meta_with_split.csv +3 -0
  19. training_data_cleaned/toxicity/tox_smiles_with_embeddings/dataset_dict.json +1 -0
  20. training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/data-00000-of-00001.arrow +3 -0
  21. training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/dataset_info.json +45 -0
  22. training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/state.json +13 -0
  23. training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/data-00000-of-00001.arrow +3 -0
  24. training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/dataset_info.json +45 -0
  25. training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/state.json +13 -0
  26. training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
  27. training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/train/data-00000-of-00005.arrow +3 -0
  28. training_data_cleaned/toxicity/tox_train.csv +3 -0
  29. training_data_cleaned/toxicity/tox_val.csv +3 -0
training_data_cleaned/half_life/halflife_smiles_with_embeddings/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train"]}
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e63b59b4a3d32c0028b6f6a6158afa96a54df3c0775a2aa30ed0c062cde8d996
3
+ size 1627872
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/dataset_info.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "labels": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "dtype": "float64",
16
+ "_type": "Value"
17
+ },
18
+ "_type": "List"
19
+ }
20
+ },
21
+ "homepage": "",
22
+ "license": ""
23
+ }
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "3ec5fd4c3c608984",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train"]}
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169b2180009e2b84e9b76a8e2ef308a018c9ff0eea0b51653b3a51b08437322e
3
+ size 269124304
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/dataset_info.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "labels": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "feature": {
16
+ "dtype": "float64",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ }
30
+ },
31
+ "homepage": "",
32
+ "license": ""
33
+ }
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "19cc99c2b57e1ac1",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
training_data_cleaned/half_life/halflife_wt_with_embeddings/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train"]}
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f49e6e2351f768ff287446a9abbe83578dee350b7b313a03911af5bdf657bdd0
3
+ size 676432
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/dataset_info.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "embedding": {
10
+ "feature": {
11
+ "dtype": "float32",
12
+ "_type": "Value"
13
+ },
14
+ "_type": "List"
15
+ },
16
+ "label": {
17
+ "dtype": "float64",
18
+ "_type": "Value"
19
+ },
20
+ "log_label": {
21
+ "dtype": "float64",
22
+ "_type": "Value"
23
+ }
24
+ },
25
+ "homepage": "",
26
+ "license": ""
27
+ }
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0aca4414c7da9c1c",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train"]}
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f78a8dcbdce7813efb495952d302ff6a907aa91bdd719b8a4fa74f75ab3997
3
+ size 17139368
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/dataset_info.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "generator",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "generator",
6
+ "dataset_size": 17137776,
7
+ "description": "",
8
+ "download_checksums": {},
9
+ "download_size": 0,
10
+ "features": {
11
+ "sequence": {
12
+ "dtype": "string",
13
+ "_type": "Value"
14
+ },
15
+ "label": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "log_label": {
20
+ "dtype": "float16",
21
+ "_type": "Value"
22
+ },
23
+ "embedding": {
24
+ "feature": {
25
+ "feature": {
26
+ "dtype": "float16",
27
+ "_type": "Value"
28
+ },
29
+ "length": 1280,
30
+ "_type": "List"
31
+ },
32
+ "_type": "List"
33
+ },
34
+ "attention_mask": {
35
+ "feature": {
36
+ "dtype": "int8",
37
+ "_type": "Value"
38
+ },
39
+ "_type": "List"
40
+ },
41
+ "length": {
42
+ "dtype": "int64",
43
+ "_type": "Value"
44
+ }
45
+ },
46
+ "homepage": "",
47
+ "license": "",
48
+ "size_in_bytes": 17137776,
49
+ "splits": {
50
+ "train": {
51
+ "name": "train",
52
+ "num_bytes": 17137776,
53
+ "num_examples": 130,
54
+ "dataset_name": "generator"
55
+ }
56
+ },
57
+ "version": {
58
+ "version_str": "0.0.0",
59
+ "major": 0,
60
+ "minor": 0,
61
+ "patch": 0
62
+ }
63
+ }
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "2e46b50ca91e8b43",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
training_data_cleaned/toxicity/smiles_toxicity.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7364d5251a50771186d312cc2644c0802df780b3b9b5f151a3ba4f0b4136949f
3
+ size 4558618
training_data_cleaned/toxicity/tox_meta_with_split.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:757730999cf644f543b57b01e37b0352fadc36f4f1cc0a4d6694f33559c71683
3
+ size 9181995
training_data_cleaned/toxicity/tox_smiles_with_embeddings/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "val"]}
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f38ff734427d1daa15c2ae885064355a7aef80f1726de8c37a504b324cf3650
3
+ size 31051104
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/dataset_info.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "generator",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "generator",
6
+ "dataset_size": 31047810,
7
+ "description": "",
8
+ "download_checksums": {},
9
+ "download_size": 0,
10
+ "features": {
11
+ "sequence": {
12
+ "dtype": "string",
13
+ "_type": "Value"
14
+ },
15
+ "label": {
16
+ "dtype": "int64",
17
+ "_type": "Value"
18
+ },
19
+ "embedding": {
20
+ "feature": {
21
+ "dtype": "float32",
22
+ "_type": "Value"
23
+ },
24
+ "length": 768,
25
+ "_type": "List"
26
+ }
27
+ },
28
+ "homepage": "",
29
+ "license": "",
30
+ "size_in_bytes": 31047810,
31
+ "splits": {
32
+ "train": {
33
+ "name": "train",
34
+ "num_bytes": 31047810,
35
+ "num_examples": 8838,
36
+ "dataset_name": "generator"
37
+ }
38
+ },
39
+ "version": {
40
+ "version_str": "0.0.0",
41
+ "major": 0,
42
+ "minor": 0,
43
+ "patch": 0
44
+ }
45
+ }
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "5a85a8b6cccaa5e4",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8757b7d66dfec9588f703a2502199e84b5623612c644b966b67575c6dcc28d67
3
+ size 7514192
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/dataset_info.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "generator",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "generator",
6
+ "dataset_size": 7512711,
7
+ "description": "",
8
+ "download_checksums": {},
9
+ "download_size": 0,
10
+ "features": {
11
+ "sequence": {
12
+ "dtype": "string",
13
+ "_type": "Value"
14
+ },
15
+ "label": {
16
+ "dtype": "int64",
17
+ "_type": "Value"
18
+ },
19
+ "embedding": {
20
+ "feature": {
21
+ "dtype": "float32",
22
+ "_type": "Value"
23
+ },
24
+ "length": 768,
25
+ "_type": "List"
26
+ }
27
+ },
28
+ "homepage": "",
29
+ "license": "",
30
+ "size_in_bytes": 7512711,
31
+ "splits": {
32
+ "train": {
33
+ "name": "train",
34
+ "num_bytes": 7512711,
35
+ "num_examples": 2198,
36
+ "dataset_name": "generator"
37
+ }
38
+ },
39
+ "version": {
40
+ "version_str": "0.0.0",
41
+ "major": 0,
42
+ "minor": 0,
43
+ "patch": 0
44
+ }
45
+ }
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "5cc160841c203048",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "val"]}
training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/train/data-00000-of-00005.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b20ec28e74e7eb0c5b768cc86f657141c35377d4a8d7bafe4e8db8d81ca90e6
3
+ size 355820424
training_data_cleaned/toxicity/tox_train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc9191faffe5d13e80a1f486c08378110e84ff51d4c92eb01e929b93cbeff5d
3
+ size 7690803
training_data_cleaned/toxicity/tox_val.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a44b940dbcc6286ea84654878fa326376034648a05ca9498b623413fe21e8f5
3
+ size 1491235