BaoLocTown commited on
Commit
0ca7762
·
verified ·
1 Parent(s): dded981

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +91 -0
  2. count.json +43 -43
  3. count_test.json +43 -43
  4. count_vietmed.json +12 -12
  5. count_vlsp_2021.json +10 -10
README.md CHANGED
@@ -16,6 +16,97 @@ tags:
16
  - encoder
17
  - entity recognition
18
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # About
20
 
21
  GLiNER is a Named Entity Recognition (NER) model capable of identifying any entity type using a bidirectional transformer encoders (BERT-like). It provides a practical alternative to traditional NER models, which are limited to predefined entities, and Large Language Models (LLMs) that, despite their flexibility, are costly and large for resource-constrained scenarios.
 
16
  - encoder
17
  - entity recognition
18
  ---
19
+ # Entity Types Classification
20
+
21
+ ## Personal Information
22
+ - Date of birth
23
+ - Age
24
+ - Gender
25
+ - Last name
26
+ - Occupation
27
+ - Education level
28
+ - Phone number
29
+ - Email
30
+ - Street address
31
+ - City
32
+ - Country
33
+ - Postcode
34
+ - User name
35
+ - Password
36
+ - Tax ID
37
+ - License plate
38
+ - CVV
39
+ - Bank routing number
40
+ - Account number
41
+ - SWIFT BIC
42
+ - Biometric identifier
43
+ - Device identifier
44
+ - Location
45
+
46
+ ## Financial Information
47
+ - Account number
48
+ - Bank routing number
49
+ - SWIFT BIC
50
+ - CVV
51
+ - Tax ID
52
+ - API key
53
+
54
+ ## Health and Medical Information
55
+ - Blood type
56
+ - Biometric identifier
57
+ - Organ
58
+ - Diseases symptom
59
+ - Diagnostics
60
+ - Preventive medicine
61
+ - Treatment
62
+ - Surgery
63
+ - Drug chemical
64
+ - Medical device technique
65
+ - Personal care
66
+
67
+ ## Online and Web-related Information
68
+ - URL
69
+ - IP address
70
+ - Email
71
+ - User name
72
+ - API key
73
+
74
+ ## Professional Information
75
+ - Occupation
76
+ - Skill
77
+ - Organization
78
+ - Company name
79
+
80
+ ## Location Information
81
+ - City
82
+ - Country
83
+ - Postcode
84
+ - Street address
85
+ - Location
86
+
87
+ ## Time-Related Information
88
+ - Date
89
+ - Date time
90
+
91
+ ## Miscellaneous
92
+ - Event
93
+ - Miscellaneous
94
+
95
+ ## Product and Goods Information
96
+ - Product
97
+ - Quantity
98
+ - Food drink
99
+ - Transportation
100
+
101
+ ## Identifiers
102
+ - Device identifier
103
+ - Biometric identifier
104
+ - User name
105
+ - Email
106
+ - Phone number
107
+ - URL
108
+ - License plate
109
+
110
  # About
111
 
112
  GLiNER is a Named Entity Recognition (NER) model capable of identifying any entity type using a bidirectional transformer encoders (BERT-like). It provides a practical alternative to traditional NER models, which are limited to predefined entities, and Large Language Models (LLMs) that, despite their flexibility, are costly and large for resource-constrained scenarios.
count.json CHANGED
@@ -1,53 +1,53 @@
1
  {
2
- "diagnostics": 1302,
3
- "date": 25179,
4
- "blood type": 3101,
5
- "organization": 37540,
6
- "occupation": 2860,
7
- "date time": 10263,
8
- "last name": 6257,
9
- "meddevicetechnique": 1460,
10
- "datetime": 2996,
11
- "license plate": 878,
12
- "quantity": 18083,
13
- "cvv": 1665,
14
- "fooddrink": 4882,
15
  "biometric identifier": 114,
16
- "swift bic": 333,
17
- "tax id": 3130,
18
- "personalcare": 2356,
19
- "location": 16087,
20
- "phone number": 22377,
21
- "education level": 2095,
22
- "account number": 7455,
23
- "drugchemical": 9503,
24
  "country": 15005,
25
- "street address": 15208,
26
- "preventivemed": 1529,
 
27
  "bank routing number": 1549,
28
- "api key": 964,
29
- "age": 3524,
30
- "event": 3192,
31
- "password": 958,
32
- "diseasesymtom": 11770,
33
- "gender": 2245,
34
- "city": 28410,
35
  "miscellaneous": 1766,
36
- "product": 10464,
37
- "organ": 2263,
 
 
 
 
 
 
38
  "ipv4": 589,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  "email": 17576,
40
- "skill": 2760,
41
- "user name": 819,
42
  "surgery": 3939,
 
 
 
 
 
 
 
 
43
  "persontype": 6482,
44
- "person": 32677,
45
- "treatment": 3081,
46
- "unitcalibrator": 1261,
47
- "transportation": 244,
48
- "date of birth": 12824,
49
- "postcode": 1878,
50
- "device identifier": 1921,
51
- "company name": 29040,
52
- "url": 2108
53
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "biometric identifier": 114,
3
+ "date of birth": 12824,
 
 
 
 
 
 
 
4
  "country": 15005,
5
+ "unitcalibrator": 1261,
6
+ "datetime": 2996,
7
+ "device identifier": 1921,
8
  "bank routing number": 1549,
9
+ "postcode": 1878,
10
+ "preventivemed": 1529,
11
+ "url": 2108,
12
+ "tax id": 3130,
13
+ "license plate": 878,
 
 
14
  "miscellaneous": 1766,
15
+ "account number": 7455,
16
+ "password": 958,
17
+ "fooddrink": 4882,
18
+ "skill": 2760,
19
+ "date time": 10263,
20
+ "company name": 29040,
21
+ "last name": 6257,
22
+ "blood type": 3101,
23
  "ipv4": 589,
24
+ "event": 3192,
25
+ "drugchemical": 9503,
26
+ "transportation": 244,
27
+ "education level": 2095,
28
+ "location": 16087,
29
+ "organization": 37540,
30
+ "diagnostics": 1302,
31
+ "organ": 2263,
32
+ "city": 28410,
33
+ "swift bic": 333,
34
+ "quantity": 18083,
35
+ "age": 3524,
36
+ "phone number": 22377,
37
+ "meddevicetechnique": 1460,
38
  "email": 17576,
39
+ "product": 10464,
40
+ "person": 32677,
41
  "surgery": 3939,
42
+ "occupation": 2860,
43
+ "gender": 2245,
44
+ "cvv": 1665,
45
+ "personalcare": 2356,
46
+ "date": 25179,
47
+ "user name": 819,
48
+ "api key": 964,
49
+ "diseasesymtom": 11770,
50
  "persontype": 6482,
51
+ "street address": 15208,
52
+ "treatment": 3081
 
 
 
 
 
 
 
53
  }
count_test.json CHANGED
@@ -1,53 +1,53 @@
1
  {
2
- "diagnostics": 127,
3
- "date": 1312,
4
- "blood type": 144,
5
- "organization": 3150,
6
- "occupation": 271,
7
- "date time": 1480,
8
- "last name": 322,
9
- "meddevicetechnique": 119,
10
- "datetime": 292,
11
- "license plate": 31,
12
- "quantity": 1513,
13
- "cvv": 93,
14
- "fooddrink": 284,
15
  "biometric identifier": 7,
16
- "swift bic": 14,
17
- "tax id": 197,
18
- "personalcare": 199,
19
- "location": 1691,
20
- "phone number": 1178,
21
- "education level": 108,
22
- "account number": 387,
23
- "drugchemical": 707,
24
  "country": 737,
25
- "street address": 780,
26
- "preventivemed": 139,
 
27
  "bank routing number": 67,
28
- "api key": 52,
29
- "age": 290,
30
- "event": 265,
31
- "password": 52,
32
- "diseasesymtom": 1199,
33
- "gender": 174,
34
- "city": 1475,
35
  "miscellaneous": 236,
36
- "product": 1064,
37
- "organ": 492,
 
 
 
 
 
 
38
  "ipv4": 47,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  "email": 899,
40
- "skill": 185,
41
- "user name": 33,
42
  "surgery": 221,
 
 
 
 
 
 
 
 
43
  "persontype": 1034,
44
- "person": 3091,
45
- "treatment": 288,
46
- "unitcalibrator": 243,
47
- "transportation": 22,
48
- "date of birth": 645,
49
- "postcode": 119,
50
- "device identifier": 109,
51
- "company name": 1440,
52
- "url": 109
53
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "biometric identifier": 7,
3
+ "date of birth": 645,
 
 
 
 
 
 
 
4
  "country": 737,
5
+ "unitcalibrator": 243,
6
+ "datetime": 292,
7
+ "device identifier": 109,
8
  "bank routing number": 67,
9
+ "postcode": 119,
10
+ "preventivemed": 139,
11
+ "url": 109,
12
+ "tax id": 197,
13
+ "license plate": 31,
 
 
14
  "miscellaneous": 236,
15
+ "account number": 387,
16
+ "password": 52,
17
+ "fooddrink": 284,
18
+ "skill": 185,
19
+ "date time": 1480,
20
+ "company name": 1440,
21
+ "last name": 322,
22
+ "blood type": 144,
23
  "ipv4": 47,
24
+ "event": 265,
25
+ "drugchemical": 707,
26
+ "transportation": 22,
27
+ "education level": 108,
28
+ "location": 1691,
29
+ "organization": 3150,
30
+ "diagnostics": 127,
31
+ "organ": 492,
32
+ "city": 1475,
33
+ "swift bic": 14,
34
+ "quantity": 1513,
35
+ "age": 290,
36
+ "phone number": 1178,
37
+ "meddevicetechnique": 119,
38
  "email": 899,
39
+ "product": 1064,
40
+ "person": 3091,
41
  "surgery": 221,
42
+ "occupation": 271,
43
+ "gender": 174,
44
+ "cvv": 93,
45
+ "personalcare": 199,
46
+ "date": 1312,
47
+ "user name": 33,
48
+ "api key": 52,
49
+ "diseasesymtom": 1199,
50
  "persontype": 1034,
51
+ "street address": 780,
52
+ "treatment": 288
 
 
 
 
 
 
 
53
  }
count_vietmed.json CHANGED
@@ -1,20 +1,20 @@
1
  {
 
 
 
 
2
  "diagnostics": 373,
 
3
  "surgery": 200,
4
- "diseasesymtom": 2966,
5
- "treatment": 740,
6
  "fooddrink": 257,
 
7
  "unitcalibrator": 822,
8
- "transportation": 5,
9
- "gender": 210,
10
- "personalcare": 383,
11
- "location": 292,
12
- "organization": 19,
13
- "occupation": 545,
14
- "drugchemical": 1127,
15
  "meddevicetechnique": 327,
 
16
  "datetime": 695,
17
- "preventivemed": 343,
18
- "organ": 1972,
19
- "age": 455
20
  }
 
1
  {
2
+ "occupation": 545,
3
+ "location": 292,
4
+ "preventivemed": 343,
5
+ "gender": 210,
6
  "diagnostics": 373,
7
+ "personalcare": 383,
8
  "surgery": 200,
9
+ "organ": 1972,
10
+ "organization": 19,
11
  "fooddrink": 257,
12
+ "age": 455,
13
  "unitcalibrator": 822,
 
 
 
 
 
 
 
14
  "meddevicetechnique": 327,
15
+ "diseasesymtom": 2966,
16
  "datetime": 695,
17
+ "drugchemical": 1127,
18
+ "transportation": 5,
19
+ "treatment": 740
20
  }
count_vlsp_2021.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "quantity": 5048,
3
- "persontype": 5304,
4
- "person": 9762,
5
- "skill": 79,
6
- "organization": 9526,
7
- "phone number": 258,
8
  "location": 9270,
 
 
9
  "miscellaneous": 1480,
 
 
10
  "date time": 7050,
11
- "product": 3358,
12
- "street address": 646,
13
  "ipv4": 66,
14
  "email": 96,
15
- "url": 350,
16
- "event": 1362
 
 
 
17
  }
 
1
  {
 
 
 
 
 
 
2
  "location": 9270,
3
+ "organization": 9526,
4
+ "url": 350,
5
  "miscellaneous": 1480,
6
+ "quantity": 5048,
7
+ "skill": 79,
8
  "date time": 7050,
9
+ "phone number": 258,
 
10
  "ipv4": 66,
11
  "email": 96,
12
+ "persontype": 5304,
13
+ "event": 1362,
14
+ "product": 3358,
15
+ "street address": 646,
16
+ "person": 9762
17
  }