| ## data structure |
|
|
| * imagenet 1k |
|
|
| ``` |
| data = { |
| 'input_sample_list': [ |
| { |
| 'data': |
| torch.rand(bs, 3, 224, 224, dtype=torch.float32), |
| 'invalid_mask': |
| None, |
| 'modality': |
| 'image', |
| 'data_type': 'input', |
| 'sample_info': { |
| 'id': list(range(bs)), |
| 'path': ['hah' for _ in range(bs)] |
| } |
| }, |
| ], |
| 'target_sample_list': [], |
| 'target_idx_list': [torch.randint(0, 1000, (bs, ))], |
| 'target_set_list': ['ImageNet22k'], |
| 'shared_target_sets': { |
| 'ImageNet22k': [{ |
| 'data': |
| torch.randint(0, 49411, (1000, 11)), |
| 'invalid_mask': |
| torch.zeros(1000, 11, dtype=torch.bool), |
| 'modality': |
| 'text', |
| 'data_type': 'target', |
| 'sample_info': { |
| 'distributed': True, |
| 'total_num': 1000, |
| } |
| }] |
| }, |
| 'task_info': { |
| 'task_name': 'imagenet', |
| 'task_type': 'image_classification', |
| 'dataset_name': 'ImageNet22k', |
| 'batchsize': None, |
| 'sampling_ratio': None |
| } |
| } |
| ``` |
| * mscoco caption |
| ``` data = { |
| 'input_sample_list': [ |
| { |
| 'data': |
| torch.rand(bs, 3, 224, 224, dtype=torch.float32), |
| 'invalid_mask': |
| None, |
| 'modality': |
| 'image', |
| 'data_type': 'input', |
| 'sample_info': [{ |
| 'id': id, |
| 'path': 'hahah', |
| 'bs': bs |
| } for _ in range(bs)] |
| }, |
| { |
| 'data': |
| torch.randint(0, 49411, (bs, 31 * 2)), |
| 'invalid_mask': |
| torch.zeros(bs, 31 * 2, dtype=torch.bool), |
| 'modality': |
| 'text', |
| 'data_type': 'input', |
| 'sample_info': [{ |
| 'pe_index': |
| torch.cat([torch.arange(31), |
| torch.arange(31)], |
| dim=0) |
| } for _ in range(bs)] |
| }, |
| ], |
| 'target_sample_list': [], |
| 'target_idx_list': [torch.randint(0, 49411, (bs, 31))], |
| 'target_set_list': ['Vocab_Word'], |
| 'shared_target_sets': { |
| 'Vocab_Word': [{ |
| 'data': torch.randint(0, 49411, (49411, 2)), |
| 'invalid_mask': None, |
| 'modality': 'text', |
| 'data_type': 'target', |
| 'sample_info': { |
| 'distributed': True, |
| 'total_num': 49411, |
| } |
| }] |
| }, |
| 'task_info': { |
| 'task_name': 'mscoco_caption', |
| 'task_type': 'image_caption', |
| 'dataset_name': 'MSCOCO', |
| 'batchsize': None, |
| 'sampling_ratio': None |
| } |
| } |
| ``` |
|
|
|
|
| * text_mlm |
| ``` |
| data = { |
| 'input_sample_list': [ |
| { |
| 'data': torch.randint(0, 49411, (bs, 128)), |
| 'invalid_mask': torch.zeros(bs, 128, dtype=torch.bool), |
| 'modality': 'text', |
| 'data_type': 'input', |
| 'sample_info': { |
| 'seq_length': 128 |
| } |
| }, |
| ], |
| 'target_sample_list': [], |
| 'target_idx_list': [torch.randint(0, 49411, |
| (bs, 128))], # most are -1, |
| 'target_set_list': ['Vocab_Word'], |
| 'shared_target_sets': { |
| 'Vocab_Word': [{ |
| 'data': torch.randint(0, 49411, (49411, 2)), |
| 'invalid_mask': None, |
| 'modality': 'text', |
| 'data_type': 'target', |
| 'sample_info': { |
| 'distributed': True, |
| 'total_num': 49411, |
| } |
| }] |
| }, |
| 'task_info': { |
| 'task_name': 'bookswiki_pretrain', |
| 'task_type': 'text_mlm', |
| 'dataset_name': 'BooksWiki', |
| 'batchsize': None, |
| 'sampling_ratio': None |
| } |
| } |
| ``` |
| |
|
|
| * mscoco retrieval |
| ``` |
| data = { |
| 'input_sample_list': [ |
| { |
| 'data': |
| torch.rand(bs, 3, 224, 224, dtype=torch.float32), |
| 'invalid_mask': |
| None, |
| 'modality': |
| 'image', |
| 'sample_info': { |
| 'id': list(range(bs)), |
| 'path': ['hah' for _ in range(bs)] |
| } |
| }, |
| ], |
| 'target_sample_list': [ |
| { |
| 'data': torch.randint(0, 49411, (bs, 30)), |
| 'invalid_mask': torch.zeros(bs, 30, |
| dtype=torch.bool), |
| 'modality': 'text', |
| 'sample_info': {} |
| }, |
| ], |
| 'target_idx_list': [], |
| 'target_set_list': [], |
| 'shared_target_sets': { |
| 'ImageNet22k': [{ |
| 'data': |
| torch.randint(0, 49411, (1000, 11)), |
| 'invalid_mask': |
| torch.zeros(1000, 11, dtype=torch.bool), |
| 'modality': |
| 'text', |
| 'sample_info': { |
| 'distributed': True, |
| 'total_num': 1000, |
| } |
| }] |
| }, |
| 'task_info': { |
| 'task_name': 'mscoco_retrieve', |
| 'task_type': 'image_retrieval', |
| 'dataset_name': 'MSCOCO', |
| 'batchsize': None, |
| 'sampling_ratio': None |
| } |
| } |
| ``` |