asar coverage - build #


src/asar/
Coverage:
low: ≥ 0%
medium: ≥ 75.0%
high: ≥ 90.0%
Lines:
60 of 107, 0 excluded
56.1%
Functions:
5 of 8, 0 excluded
62.5%
Branches:
87 of 227, 0 excluded
38.3%

unicode.cpp
Line Branch Exec Source
1 #include "unicode.h"
2
3 281414 size_t utf8_val(int* codepoint, const char* inp) {
4 281414 unsigned char c = *inp++;
5 int val;
6
4/4
✓ Branch 2 → 3 taken 278997 times.
✓ Branch 2 → 4 taken 175 times.
✓ Branch 4 → 5 taken 2085 times.
✓ Branch 4 → 7 taken 157 times.
281414 if (c < 0x80) {
7 // plain ascii
8 281082 *codepoint = c;
9 281082 return 1u;
10 }
11 // RPG Hacker: Byte sequences starting with 0xC0 or 0xC1 are invalid.
12 // So are byte sequences starting with anything >= 0xF5.
13 // And anything below 0xC0 indicates a follow-up byte and should never be at the start of a sequence.
14
4/8
✓ Branch 4 → 5 taken 175 times.
✗ Branch 4 → 29 not taken.
✓ Branch 5 → 6 taken 175 times.
✗ Branch 5 → 29 not taken.
✓ Branch 7 → 8 taken 157 times.
✗ Branch 7 → 42 not taken.
✓ Branch 8 → 9 taken 157 times.
✗ Branch 8 → 42 not taken.
332 else if (c > 0xC1 && c < 0xF5) {
15 // 1, 2 or 3 continuation bytes
16
8/8
✓ Branch 6 → 7 taken 126 times.
✓ Branch 6 → 11 taken 49 times.
✓ Branch 7 → 8 taken 62 times.
✓ Branch 7 → 9 taken 64 times.
✓ Branch 9 → 10 taken 114 times.
✓ Branch 9 → 14 taken 43 times.
✓ Branch 10 → 11 taken 50 times.
✓ Branch 10 → 12 taken 64 times.
332 int cont_byte_count = (c >= 0xF0) ? 3 : (c >= 0xE0) ? 2 : 1;
17 // bit hack to extract the significant bits from the start byte
18
1/2
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 157 times.
332 val = (c & ((1 << (6 - cont_byte_count)) - 1));
19
4/4
✓ Branch 16 → 13 taken 334 times.
✓ Branch 16 → 17 taken 174 times.
✓ Branch 26 → 18 taken 292 times.
✓ Branch 26 → 27 taken 156 times.
956 for (int i = 0; i < cont_byte_count; i++) {
20 626 unsigned char next = *inp++;
21
4/4
✓ Branch 13 → 14 taken 1 time.
✓ Branch 13 → 15 taken 333 times.
✓ Branch 20 → 21 taken 1 time.
✓ Branch 20 → 23 taken 291 times.
626 if ((next & 0xC0) != 0x80) {
22 2 *codepoint = -1;
23 2 return 0u;
24 }
25
1/2
✗ Branch 23 → 24 not taken.
✓ Branch 23 → 25 taken 291 times.
624 val = (val << 6) | (next & 0x3F);
26 }
27 330 if (// too many cont.bytes
28
4/8
✓ Branch 17 → 18 taken 174 times.
✗ Branch 17 → 27 not taken.
✓ Branch 18 → 19 taken 174 times.
✗ Branch 18 → 27 not taken.
✓ Branch 28 → 29 taken 156 times.
✗ Branch 28 → 38 not taken.
✓ Branch 29 → 30 taken 156 times.
✗ Branch 29 → 38 not taken.
330 (*inp & 0xC0) == 0x80 ||
29
30 // invalid codepoints
31
4/4
✓ Branch 19 → 20 taken 49 times.
✓ Branch 19 → 21 taken 125 times.
✓ Branch 30 → 31 taken 43 times.
✓ Branch 30 → 32 taken 113 times.
330 val > 0x10FFFF ||
32
33 // check overlong encodings
34
6/8
✓ Branch 20 → 21 taken 49 times.
✗ Branch 20 → 27 not taken.
✓ Branch 21 → 22 taken 61 times.
✓ Branch 21 → 23 taken 113 times.
✓ Branch 31 → 32 taken 43 times.
✗ Branch 31 → 38 not taken.
✓ Branch 32 → 33 taken 49 times.
✓ Branch 32 → 34 taken 107 times.
330 (cont_byte_count == 3 && val < 0x1000) ||
35
6/8
✓ Branch 22 → 23 taken 61 times.
✗ Branch 22 → 27 not taken.
✓ Branch 23 → 24 taken 64 times.
✓ Branch 23 → 25 taken 110 times.
✓ Branch 33 → 34 taken 49 times.
✗ Branch 33 → 38 not taken.
✓ Branch 34 → 35 taken 64 times.
✓ Branch 34 → 36 taken 92 times.
330 (cont_byte_count == 2 && val < 0x800) ||
36
6/8
✓ Branch 24 → 25 taken 64 times.
✗ Branch 24 → 27 not taken.
✓ Branch 25 → 26 taken 50 times.
✓ Branch 25 → 28 taken 124 times.
✓ Branch 35 → 36 taken 64 times.
✗ Branch 35 → 38 not taken.
✓ Branch 36 → 37 taken 44 times.
✓ Branch 36 → 40 taken 112 times.
330 (cont_byte_count == 1 && val < 0x80) ||
37
38 // UTF16 surrogates
39
2/4
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 50 times.
✗ Branch 37 → 38 not taken.
✓ Branch 37 → 40 taken 44 times.
94 (val >= 0xD800 && val <= 0xDFFF)
40 ) {
41 *codepoint = -1;
42 return 0u;
43 };
44 330 *codepoint = val;
45 330 return 1u + cont_byte_count;
46 }
47
48 // if none of the above, this couldn't possibly be a valid encoding
49 *codepoint = -1;
50 return 0u;
51 }
52
53 bool codepoint_to_utf8(string* out, unsigned int codepoint) {
54 *out = "";
55 if (codepoint < 0x80) {
56 *out += (unsigned char)codepoint;
57 }
58 else if (codepoint < 0x800) {
59 *out += (unsigned char)(0xc0 | (codepoint >> 6));
60 *out += (unsigned char)(0x80 | (codepoint & 0x3f));
61 }
62 else if (codepoint < 0x10000) {
63 *out += (unsigned char)(0xe0 | (codepoint >> 12));
64 *out += (unsigned char)(0x80 | ((codepoint >> 6) & 0x3f));
65 *out += (unsigned char)(0x80 | (codepoint & 0x3f));
66 }
67 else if (codepoint < 0x110000) {
68 *out += (unsigned char)(0xf0 | (codepoint >> 18));
69 *out += (unsigned char)(0x80 | ((codepoint >> 12) & 0x3f));
70 *out += (unsigned char)(0x80 | ((codepoint >> 6) & 0x3f));
71 *out += (unsigned char)(0x80 | (codepoint & 0x3f));
72 }
73 else return false;
74
75 return true;
76 }
77
78 334 bool is_valid_utf8(const char* inp, size_t inp_len) {
79
4/6
is_valid_utf8(char const*, unsigned long):
✗ Branch 13 → 3 not taken.
✗ Branch 13 → 14 not taken.
✓ Branch 22 → 3 taken 13346 times.
✓ Branch 22 → 23 taken 186 times.
is_valid_utf8(char const*, unsigned long long):
✓ Branch 13 → 3 taken 13029 times.
✓ Branch 13 → 14 taken 146 times.
26707 for(size_t i = 0; i < inp_len;) {
80 // optimization: if next 8 bytes are ascii, skip them
81
4/5
is_valid_utf8(char const*, unsigned long):
✓ Branch 3 → 4 taken 12668 times.
✗ Branch 3 → 7 not taken.
✓ Branch 3 → 13 taken 678 times.
is_valid_utf8(char const*, unsigned long long):
✓ Branch 3 → 4 taken 12501 times.
✓ Branch 3 → 7 taken 528 times.
26375 if(i + 8 <= inp_len) {
82 12668 uint64_t buf;
83 25169 memcpy(&buf, inp+i, sizeof(buf));
84
4/6
is_valid_utf8(char const*, unsigned long):
✗ Branch 4 → 5 not taken.
✗ Branch 4 → 6 not taken.
✓ Branch 7 → 8 taken 12531 times.
✓ Branch 7 → 9 taken 137 times.
is_valid_utf8(char const*, unsigned long long):
✓ Branch 4 → 5 taken 12364 times.
✓ Branch 4 → 6 taken 137 times.
25169 if((buf & 0x8080808080808080ull) == 0) {
85 24895 i += 8; continue;
86 }
87 }
88
89 815 int codepoint;
90
1/2
✓ Branch 15 → 16 taken 810 times.
✗ Branch 15 → 25 not taken.
1480 i += utf8_val(&codepoint, inp+i);
91
92
4/6
is_valid_utf8(char const*, unsigned long):
✗ Branch 8 → 9 not taken.
✗ Branch 8 → 10 not taken.
✓ Branch 16 → 17 taken 1 time.
✓ Branch 16 → 18 taken 814 times.
is_valid_utf8(char const*, unsigned long long):
✓ Branch 8 → 9 taken 1 time.
✓ Branch 8 → 10 taken 664 times.
1480 if (codepoint == -1) return false;
93 }
94
95 332 return true;
96 }
97
98 size_t utf16_val(int* codepoint, const wchar_t* inp)
99 {
100 wchar_t first_word = *inp;
101
102 if (first_word <= 0xD800 || first_word >= 0xDFFF)
103 {
104 // Single word
105 *codepoint = first_word;
106 return 1u;
107 }
108 else if (first_word >= 0xD800 && first_word <= 0xDBFF)
109 {
110 // Start of a surrogate pair
111 wchar_t second_word = *(inp + 1);
112
113 if (second_word >= 0xDC00 && second_word <= 0xDFFF)
114 {
115 *codepoint = 0x10000
116 + ((int)(first_word - 0xD800) << 10u)
117 + ((int)(second_word - 0xDC00));
118 return 2u;
119 }
120 }
121
122 // Everything not covered above is considered invalid.
123 *codepoint = -1;
124 return 0u;
125 }
126
127 277101 bool codepoint_to_utf16(std::wstring* out, unsigned int codepoint)
128 {
129
3/6
✓ Branch 2 → 3 taken 6 times.
✓ Branch 2 → 12 taken 277095 times.
✗ Branch 2 → 22 not taken.
✓ Branch 3 → 4 taken 6 times.
✗ Branch 3 → 12 not taken.
✗ Branch 3 → 22 not taken.
277101 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
130 {
131 6 wchar_t high = (wchar_t)(((codepoint - 0x10000) >> 10) + 0xD800);
132 6 wchar_t low = (wchar_t)(((codepoint - 0x10000) & 0b1111111111) + 0xDC00);
133
134
2/8
✓ Branch 5 → 6 taken 6 times.
✗ Branch 5 → 24 not taken.
✓ Branch 6 → 7 taken 6 times.
✗ Branch 6 → 22 not taken.
✗ Branch 9 → 10 not taken.
✗ Branch 9 → 41 not taken.
✗ Branch 11 → 12 not taken.
✗ Branch 11 → 39 not taken.
6 *out = std::wstring() + high + low;
135 6 return true;
136 }
137
1/8
✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 277095 times.
✗ Branch 13 → 14 not taken.
✗ Branch 13 → 20 not taken.
✗ Branch 22 → 23 not taken.
✗ Branch 22 → 24 not taken.
✗ Branch 23 → 24 not taken.
✗ Branch 23 → 37 not taken.
277095 else if (codepoint <= 0xD800 || codepoint >= 0xDFFF)
138 {
139
1/4
✓ Branch 15 → 16 taken 277095 times.
✗ Branch 15 → 29 not taken.
✗ Branch 28 → 29 not taken.
✗ Branch 28 → 49 not taken.
277095 *out = std::wstring() + (wchar_t)codepoint;
140 277095 return true;
141 }
142
143 // Everything not covered above should be considered invalid.
144 return false;
145 }
146
147
148 bool utf16_to_utf8(string* result, const wchar_t* u16_str)
149 {
150 *result = "";
151
152 int codepoint;
153 do
154 {
155 u16_str += utf16_val(&codepoint, u16_str);
156
157 string next;
158 if (codepoint == -1 || !codepoint_to_utf8(&next, codepoint)) return false;
159
160 *result += next;
161 } while (codepoint != 0);
162
163 return true;
164 }
165
166 5425 bool utf8_to_utf16(std::wstring* result, const char* u8_str)
167 {
168
1/4
✓ Branch 2 → 3 taken 5425 times.
✗ Branch 2 → 27 not taken.
✗ Branch 3 → 4 not taken.
✗ Branch 3 → 35 not taken.
5425 *result = L"";
169
170 int codepoint;
171 do
172 {
173
0/2
✗ Branch 5 → 6 not taken.
✗ Branch 5 → 33 not taken.
277101 u8_str += utf8_val(&codepoint, u8_str);
174
175 277101 std::wstring next;
176
4/14
✓ Branch 6 → 7 taken 277101 times.
✗ Branch 6 → 9 not taken.
✓ Branch 7 → 8 taken 277101 times.
✗ Branch 7 → 24 not taken.
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 277101 times.
✗ Branch 9 → 10 not taken.
✗ Branch 9 → 12 not taken.
✗ Branch 10 → 11 not taken.
✗ Branch 10 → 31 not taken.
✗ Branch 11 → 12 not taken.
✓ Branch 11 → 13 taken 277101 times.
✗ Branch 14 → 15 not taken.
✗ Branch 14 → 16 not taken.
277101 if (codepoint == -1 || !codepoint_to_utf16(&next, codepoint)) return false;
177
178
1/4
✓ Branch 13 → 14 taken 277101 times.
✗ Branch 13 → 24 not taken.
✗ Branch 17 → 18 not taken.
✗ Branch 17 → 31 not taken.
277101 *result += next;
179
3/8
✓ Branch 16 → 17 taken 277101 times.
✗ Branch 16 → 20 not taken.
✓ Branch 18 → 19 taken 271676 times.
✓ Branch 18 → 21 taken 5425 times.
✗ Branch 20 → 21 not taken.
✗ Branch 20 → 22 not taken.
✗ Branch 26 → 27 not taken.
✗ Branch 26 → 28 not taken.
554202 } while (codepoint != 0);
180
181 5425 return true;
182 }
183