ynuozhang
commited on
Commit
·
36d2203
1
Parent(s):
ec353cd
update data
Browse files- training_data_cleaned/half_life/halflife_smiles_with_embeddings/dataset_dict.json +1 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/dataset_info.json +23 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/state.json +13 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/dataset_info.json +33 -0
- training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/state.json +13 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings/dataset_dict.json +1 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings/train/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings/train/dataset_info.json +27 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings/train/state.json +13 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/dataset_dict.json +1 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/dataset_info.json +63 -0
- training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/state.json +13 -0
- training_data_cleaned/toxicity/smiles_toxicity.csv +3 -0
- training_data_cleaned/toxicity/tox_meta_with_split.csv +3 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/dataset_dict.json +1 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/dataset_info.json +45 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/state.json +13 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/data-00000-of-00001.arrow +3 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/dataset_info.json +45 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/state.json +13 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
- training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/train/data-00000-of-00005.arrow +3 -0
- training_data_cleaned/toxicity/tox_train.csv +3 -0
- training_data_cleaned/toxicity/tox_val.csv +3 -0
training_data_cleaned/half_life/halflife_smiles_with_embeddings/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train"]}
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e63b59b4a3d32c0028b6f6a6158afa96a54df3c0775a2aa30ed0c062cde8d996
|
| 3 |
+
size 1627872
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/dataset_info.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"sequence": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"labels": {
|
| 10 |
+
"dtype": "float64",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
},
|
| 13 |
+
"embedding": {
|
| 14 |
+
"feature": {
|
| 15 |
+
"dtype": "float64",
|
| 16 |
+
"_type": "Value"
|
| 17 |
+
},
|
| 18 |
+
"_type": "List"
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"homepage": "",
|
| 22 |
+
"license": ""
|
| 23 |
+
}
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "3ec5fd4c3c608984",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train"]}
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:169b2180009e2b84e9b76a8e2ef308a018c9ff0eea0b51653b3a51b08437322e
|
| 3 |
+
size 269124304
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/dataset_info.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"sequence": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"labels": {
|
| 10 |
+
"dtype": "float64",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
},
|
| 13 |
+
"embedding": {
|
| 14 |
+
"feature": {
|
| 15 |
+
"feature": {
|
| 16 |
+
"dtype": "float64",
|
| 17 |
+
"_type": "Value"
|
| 18 |
+
},
|
| 19 |
+
"_type": "List"
|
| 20 |
+
},
|
| 21 |
+
"_type": "List"
|
| 22 |
+
},
|
| 23 |
+
"attention_mask": {
|
| 24 |
+
"feature": {
|
| 25 |
+
"dtype": "int8",
|
| 26 |
+
"_type": "Value"
|
| 27 |
+
},
|
| 28 |
+
"_type": "List"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"homepage": "",
|
| 32 |
+
"license": ""
|
| 33 |
+
}
|
training_data_cleaned/half_life/halflife_smiles_with_embeddings_unpooled/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "19cc99c2b57e1ac1",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train"]}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f49e6e2351f768ff287446a9abbe83578dee350b7b313a03911af5bdf657bdd0
|
| 3 |
+
size 676432
|
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/dataset_info.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"sequence": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"embedding": {
|
| 10 |
+
"feature": {
|
| 11 |
+
"dtype": "float32",
|
| 12 |
+
"_type": "Value"
|
| 13 |
+
},
|
| 14 |
+
"_type": "List"
|
| 15 |
+
},
|
| 16 |
+
"label": {
|
| 17 |
+
"dtype": "float64",
|
| 18 |
+
"_type": "Value"
|
| 19 |
+
},
|
| 20 |
+
"log_label": {
|
| 21 |
+
"dtype": "float64",
|
| 22 |
+
"_type": "Value"
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"homepage": "",
|
| 26 |
+
"license": ""
|
| 27 |
+
}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "0aca4414c7da9c1c",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train"]}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f78a8dcbdce7813efb495952d302ff6a907aa91bdd719b8a4fa74f75ab3997
|
| 3 |
+
size 17139368
|
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/dataset_info.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"builder_name": "generator",
|
| 3 |
+
"citation": "",
|
| 4 |
+
"config_name": "default",
|
| 5 |
+
"dataset_name": "generator",
|
| 6 |
+
"dataset_size": 17137776,
|
| 7 |
+
"description": "",
|
| 8 |
+
"download_checksums": {},
|
| 9 |
+
"download_size": 0,
|
| 10 |
+
"features": {
|
| 11 |
+
"sequence": {
|
| 12 |
+
"dtype": "string",
|
| 13 |
+
"_type": "Value"
|
| 14 |
+
},
|
| 15 |
+
"label": {
|
| 16 |
+
"dtype": "float16",
|
| 17 |
+
"_type": "Value"
|
| 18 |
+
},
|
| 19 |
+
"log_label": {
|
| 20 |
+
"dtype": "float16",
|
| 21 |
+
"_type": "Value"
|
| 22 |
+
},
|
| 23 |
+
"embedding": {
|
| 24 |
+
"feature": {
|
| 25 |
+
"feature": {
|
| 26 |
+
"dtype": "float16",
|
| 27 |
+
"_type": "Value"
|
| 28 |
+
},
|
| 29 |
+
"length": 1280,
|
| 30 |
+
"_type": "List"
|
| 31 |
+
},
|
| 32 |
+
"_type": "List"
|
| 33 |
+
},
|
| 34 |
+
"attention_mask": {
|
| 35 |
+
"feature": {
|
| 36 |
+
"dtype": "int8",
|
| 37 |
+
"_type": "Value"
|
| 38 |
+
},
|
| 39 |
+
"_type": "List"
|
| 40 |
+
},
|
| 41 |
+
"length": {
|
| 42 |
+
"dtype": "int64",
|
| 43 |
+
"_type": "Value"
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"homepage": "",
|
| 47 |
+
"license": "",
|
| 48 |
+
"size_in_bytes": 17137776,
|
| 49 |
+
"splits": {
|
| 50 |
+
"train": {
|
| 51 |
+
"name": "train",
|
| 52 |
+
"num_bytes": 17137776,
|
| 53 |
+
"num_examples": 130,
|
| 54 |
+
"dataset_name": "generator"
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"version": {
|
| 58 |
+
"version_str": "0.0.0",
|
| 59 |
+
"major": 0,
|
| 60 |
+
"minor": 0,
|
| 61 |
+
"patch": 0
|
| 62 |
+
}
|
| 63 |
+
}
|
training_data_cleaned/half_life/halflife_wt_with_embeddings_unpooled/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "2e46b50ca91e8b43",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": "train"
|
| 13 |
+
}
|
training_data_cleaned/toxicity/smiles_toxicity.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7364d5251a50771186d312cc2644c0802df780b3b9b5f151a3ba4f0b4136949f
|
| 3 |
+
size 4558618
|
training_data_cleaned/toxicity/tox_meta_with_split.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:757730999cf644f543b57b01e37b0352fadc36f4f1cc0a4d6694f33559c71683
|
| 3 |
+
size 9181995
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train", "val"]}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f38ff734427d1daa15c2ae885064355a7aef80f1726de8c37a504b324cf3650
|
| 3 |
+
size 31051104
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/dataset_info.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"builder_name": "generator",
|
| 3 |
+
"citation": "",
|
| 4 |
+
"config_name": "default",
|
| 5 |
+
"dataset_name": "generator",
|
| 6 |
+
"dataset_size": 31047810,
|
| 7 |
+
"description": "",
|
| 8 |
+
"download_checksums": {},
|
| 9 |
+
"download_size": 0,
|
| 10 |
+
"features": {
|
| 11 |
+
"sequence": {
|
| 12 |
+
"dtype": "string",
|
| 13 |
+
"_type": "Value"
|
| 14 |
+
},
|
| 15 |
+
"label": {
|
| 16 |
+
"dtype": "int64",
|
| 17 |
+
"_type": "Value"
|
| 18 |
+
},
|
| 19 |
+
"embedding": {
|
| 20 |
+
"feature": {
|
| 21 |
+
"dtype": "float32",
|
| 22 |
+
"_type": "Value"
|
| 23 |
+
},
|
| 24 |
+
"length": 768,
|
| 25 |
+
"_type": "List"
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"homepage": "",
|
| 29 |
+
"license": "",
|
| 30 |
+
"size_in_bytes": 31047810,
|
| 31 |
+
"splits": {
|
| 32 |
+
"train": {
|
| 33 |
+
"name": "train",
|
| 34 |
+
"num_bytes": 31047810,
|
| 35 |
+
"num_examples": 8838,
|
| 36 |
+
"dataset_name": "generator"
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"version": {
|
| 40 |
+
"version_str": "0.0.0",
|
| 41 |
+
"major": 0,
|
| 42 |
+
"minor": 0,
|
| 43 |
+
"patch": 0
|
| 44 |
+
}
|
| 45 |
+
}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "5a85a8b6cccaa5e4",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": "train"
|
| 13 |
+
}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8757b7d66dfec9588f703a2502199e84b5623612c644b966b67575c6dcc28d67
|
| 3 |
+
size 7514192
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/dataset_info.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"builder_name": "generator",
|
| 3 |
+
"citation": "",
|
| 4 |
+
"config_name": "default",
|
| 5 |
+
"dataset_name": "generator",
|
| 6 |
+
"dataset_size": 7512711,
|
| 7 |
+
"description": "",
|
| 8 |
+
"download_checksums": {},
|
| 9 |
+
"download_size": 0,
|
| 10 |
+
"features": {
|
| 11 |
+
"sequence": {
|
| 12 |
+
"dtype": "string",
|
| 13 |
+
"_type": "Value"
|
| 14 |
+
},
|
| 15 |
+
"label": {
|
| 16 |
+
"dtype": "int64",
|
| 17 |
+
"_type": "Value"
|
| 18 |
+
},
|
| 19 |
+
"embedding": {
|
| 20 |
+
"feature": {
|
| 21 |
+
"dtype": "float32",
|
| 22 |
+
"_type": "Value"
|
| 23 |
+
},
|
| 24 |
+
"length": 768,
|
| 25 |
+
"_type": "List"
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"homepage": "",
|
| 29 |
+
"license": "",
|
| 30 |
+
"size_in_bytes": 7512711,
|
| 31 |
+
"splits": {
|
| 32 |
+
"train": {
|
| 33 |
+
"name": "train",
|
| 34 |
+
"num_bytes": 7512711,
|
| 35 |
+
"num_examples": 2198,
|
| 36 |
+
"dataset_name": "generator"
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"version": {
|
| 40 |
+
"version_str": "0.0.0",
|
| 41 |
+
"major": 0,
|
| 42 |
+
"minor": 0,
|
| 43 |
+
"patch": 0
|
| 44 |
+
}
|
| 45 |
+
}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings/val/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "5cc160841c203048",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": "train"
|
| 13 |
+
}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train", "val"]}
|
training_data_cleaned/toxicity/tox_smiles_with_embeddings_unpooled/train/data-00000-of-00005.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b20ec28e74e7eb0c5b768cc86f657141c35377d4a8d7bafe4e8db8d81ca90e6
|
| 3 |
+
size 355820424
|
training_data_cleaned/toxicity/tox_train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cc9191faffe5d13e80a1f486c08378110e84ff51d4c92eb01e929b93cbeff5d
|
| 3 |
+
size 7690803
|
training_data_cleaned/toxicity/tox_val.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a44b940dbcc6286ea84654878fa326376034648a05ca9498b623413fe21e8f5
|
| 3 |
+
size 1491235
|