2 * Copyright Neil Brown ©2017-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * Filter a view on a document to convert 8-bit chars in various
6 * charsets to the relevant unicode characters.
8 * Include tables transformed from
9 * https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1251.txt
10 * https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
11 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
12 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
13 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT
19 #define PANE_DATA_PTR_TYPE const wchar_t *
21 #include "core-pane.h"
23 static const wchar_t WIN1251_UNICODE_TABLE[] = {
24 [0x00] = 0x0000, // Null
25 [0x01] = 0x0001, // Start Of Heading
26 [0x02] = 0x0002, // Start Of Text
27 [0x03] = 0x0003, // End Of Text
28 [0x04] = 0x0004, // End Of Transmission
29 [0x05] = 0x0005, // Enquiry
30 [0x06] = 0x0006, // Acknowledge
31 [0x07] = 0x0007, // Bell
32 [0x08] = 0x0008, // Backspace
33 [0x09] = 0x0009, // Horizontal Tabulation
34 [0x0a] = 0x000a, // Line Feed
35 [0x0b] = 0x000b, // Vertical Tabulation
36 [0x0c] = 0x000c, // Form Feed
37 [0x0d] = 0x000d, // Carriage Return
38 [0x0e] = 0x000e, // Shift Out
39 [0x0f] = 0x000f, // Shift In
40 [0x10] = 0x0010, // Data Link Escape
41 [0x11] = 0x0011, // Device Control One
42 [0x12] = 0x0012, // Device Control Two
43 [0x13] = 0x0013, // Device Control Three
44 [0x14] = 0x0014, // Device Control Four
45 [0x15] = 0x0015, // Negative Acknowledge
46 [0x16] = 0x0016, // Synchronous Idle
47 [0x17] = 0x0017, // End Of Transmission Block
48 [0x18] = 0x0018, // Cancel
49 [0x19] = 0x0019, // End Of Medium
50 [0x1a] = 0x001a, // Substitute
51 [0x1b] = 0x001b, // Escape
52 [0x1c] = 0x001c, // File Separator
53 [0x1d] = 0x001d, // Group Separator
54 [0x1e] = 0x001e, // Record Separator
55 [0x1f] = 0x001f, // Unit Separator
56 [0x20] = 0x0020, // Space
57 [0x21] = 0x0021, // Exclamation Mark
58 [0x22] = 0x0022, // Quotation Mark
59 [0x23] = 0x0023, // Number Sign
60 [0x24] = 0x0024, // Dollar Sign
61 [0x25] = 0x0025, // Percent Sign
62 [0x26] = 0x0026, // Ampersand
63 [0x27] = 0x0027, // Apostrophe
64 [0x28] = 0x0028, // Left Parenthesis
65 [0x29] = 0x0029, // Right Parenthesis
66 [0x2a] = 0x002a, // Asterisk
67 [0x2b] = 0x002b, // Plus Sign
68 [0x2c] = 0x002c, // Comma
69 [0x2d] = 0x002d, // Hyphen-Minus
70 [0x2e] = 0x002e, // Full Stop
71 [0x2f] = 0x002f, // Solidus
72 [0x30] = 0x0030, // Digit Zero
73 [0x31] = 0x0031, // Digit One
74 [0x32] = 0x0032, // Digit Two
75 [0x33] = 0x0033, // Digit Three
76 [0x34] = 0x0034, // Digit Four
77 [0x35] = 0x0035, // Digit Five
78 [0x36] = 0x0036, // Digit Six
79 [0x37] = 0x0037, // Digit Seven
80 [0x38] = 0x0038, // Digit Eight
81 [0x39] = 0x0039, // Digit Nine
82 [0x3a] = 0x003a, // Colon
83 [0x3b] = 0x003b, // Semicolon
84 [0x3c] = 0x003c, // Less-Than Sign
85 [0x3d] = 0x003d, // Equals Sign
86 [0x3e] = 0x003e, // Greater-Than Sign
87 [0x3f] = 0x003f, // Question Mark
88 [0x40] = 0x0040, // Commercial At
89 [0x41] = 0x0041, // Latin Capital Letter A
90 [0x42] = 0x0042, // Latin Capital Letter B
91 [0x43] = 0x0043, // Latin Capital Letter C
92 [0x44] = 0x0044, // Latin Capital Letter D
93 [0x45] = 0x0045, // Latin Capital Letter E
94 [0x46] = 0x0046, // Latin Capital Letter F
95 [0x47] = 0x0047, // Latin Capital Letter G
96 [0x48] = 0x0048, // Latin Capital Letter H
97 [0x49] = 0x0049, // Latin Capital Letter I
98 [0x4a] = 0x004a, // Latin Capital Letter J
99 [0x4b] = 0x004b, // Latin Capital Letter K
100 [0x4c] = 0x004c, // Latin Capital Letter L
101 [0x4d] = 0x004d, // Latin Capital Letter M
102 [0x4e] = 0x004e, // Latin Capital Letter N
103 [0x4f] = 0x004f, // Latin Capital Letter O
104 [0x50] = 0x0050, // Latin Capital Letter P
105 [0x51] = 0x0051, // Latin Capital Letter Q
106 [0x52] = 0x0052, // Latin Capital Letter R
107 [0x53] = 0x0053, // Latin Capital Letter S
108 [0x54] = 0x0054, // Latin Capital Letter T
109 [0x55] = 0x0055, // Latin Capital Letter U
110 [0x56] = 0x0056, // Latin Capital Letter V
111 [0x57] = 0x0057, // Latin Capital Letter W
112 [0x58] = 0x0058, // Latin Capital Letter X
113 [0x59] = 0x0059, // Latin Capital Letter Y
114 [0x5a] = 0x005a, // Latin Capital Letter Z
115 [0x5b] = 0x005b, // Left Square Bracket
116 [0x5c] = 0x005c, // Reverse Solidus
117 [0x5d] = 0x005d, // Right Square Bracket
118 [0x5e] = 0x005e, // Circumflex Accent
119 [0x5f] = 0x005f, // Low Line
120 [0x60] = 0x0060, // Grave Accent
121 [0x61] = 0x0061, // Latin Small Letter A
122 [0x62] = 0x0062, // Latin Small Letter B
123 [0x63] = 0x0063, // Latin Small Letter C
124 [0x64] = 0x0064, // Latin Small Letter D
125 [0x65] = 0x0065, // Latin Small Letter E
126 [0x66] = 0x0066, // Latin Small Letter F
127 [0x67] = 0x0067, // Latin Small Letter G
128 [0x68] = 0x0068, // Latin Small Letter H
129 [0x69] = 0x0069, // Latin Small Letter I
130 [0x6a] = 0x006a, // Latin Small Letter J
131 [0x6b] = 0x006b, // Latin Small Letter K
132 [0x6c] = 0x006c, // Latin Small Letter L
133 [0x6d] = 0x006d, // Latin Small Letter M
134 [0x6e] = 0x006e, // Latin Small Letter N
135 [0x6f] = 0x006f, // Latin Small Letter O
136 [0x70] = 0x0070, // Latin Small Letter P
137 [0x71] = 0x0071, // Latin Small Letter Q
138 [0x72] = 0x0072, // Latin Small Letter R
139 [0x73] = 0x0073, // Latin Small Letter S
140 [0x74] = 0x0074, // Latin Small Letter T
141 [0x75] = 0x0075, // Latin Small Letter U
142 [0x76] = 0x0076, // Latin Small Letter V
143 [0x77] = 0x0077, // Latin Small Letter W
144 [0x78] = 0x0078, // Latin Small Letter X
145 [0x79] = 0x0079, // Latin Small Letter Y
146 [0x7a] = 0x007a, // Latin Small Letter Z
147 [0x7b] = 0x007b, // Left Curly Bracket
148 [0x7c] = 0x007c, // Vertical Line
149 [0x7d] = 0x007d, // Right Curly Bracket
150 [0x7e] = 0x007e, // Tilde
151 [0x7f] = 0x007f, // Delete
152 [0x80] = 0x0402, // Cyrillic Capital Letter Dje
153 [0x81] = 0x0403, // Cyrillic Capital Letter Gje
154 [0x82] = 0x201a, // Single Low-9 Quotation Mark
155 [0x83] = 0x0453, // Cyrillic Small Letter Gje
156 [0x84] = 0x201e, // Double Low-9 Quotation Mark
157 [0x85] = 0x2026, // Horizontal Ellipsis
158 [0x86] = 0x2020, // Dagger
159 [0x87] = 0x2021, // Double Dagger
160 [0x88] = 0x20ac, // Euro Sign
161 [0x89] = 0x2030, // Per Mille Sign
162 [0x8a] = 0x0409, // Cyrillic Capital Letter Lje
163 [0x8b] = 0x2039, // Single Left-Pointing Angle Quotation Mark
164 [0x8c] = 0x040a, // Cyrillic Capital Letter Nje
165 [0x8d] = 0x040c, // Cyrillic Capital Letter Kje
166 [0x8e] = 0x040b, // Cyrillic Capital Letter Tshe
167 [0x8f] = 0x040f, // Cyrillic Capital Letter Dzhe
168 [0x90] = 0x0452, // Cyrillic Small Letter Dje
169 [0x91] = 0x2018, // Left Single Quotation Mark
170 [0x92] = 0x2019, // Right Single Quotation Mark
171 [0x93] = 0x201c, // Left Double Quotation Mark
172 [0x94] = 0x201d, // Right Double Quotation Mark
173 [0x95] = 0x2022, // Bullet
174 [0x96] = 0x2013, // En Dash
175 [0x97] = 0x2014, // Em Dash
176 [0x98] = 0x0098, // ??
177 [0x99] = 0x2122, // Trade Mark Sign
178 [0x9a] = 0x0459, // Cyrillic Small Letter Lje
179 [0x9b] = 0x203a, // Single Right-Pointing Angle Quotation Mark
180 [0x9c] = 0x045a, // Cyrillic Small Letter Nje
181 [0x9d] = 0x045c, // Cyrillic Small Letter Kje
182 [0x9e] = 0x045b, // Cyrillic Small Letter Tshe
183 [0x9f] = 0x045f, // Cyrillic Small Letter Dzhe
184 [0xa0] = 0x00a0, // No-Break Space
185 [0xa1] = 0x040e, // Cyrillic Capital Letter Short U
186 [0xa2] = 0x045e, // Cyrillic Small Letter Short U
187 [0xa3] = 0x0408, // Cyrillic Capital Letter Je
188 [0xa4] = 0x00a4, // Currency Sign
189 [0xa5] = 0x0490, // Cyrillic Capital Letter Ghe With Upturn
190 [0xa6] = 0x00a6, // Broken Bar
191 [0xa7] = 0x00a7, // Section Sign
192 [0xa8] = 0x0401, // Cyrillic Capital Letter Io
193 [0xa9] = 0x00a9, // Copyright Sign
194 [0xaa] = 0x0404, // Cyrillic Capital Letter Ukrainian Ie
195 [0xab] = 0x00ab, // Left-Pointing Double Angle Quotation Mark
196 [0xac] = 0x00ac, // Not Sign
197 [0xad] = 0x00ad, // Soft Hyphen
198 [0xae] = 0x00ae, // Registered Sign
199 [0xaf] = 0x0407, // Cyrillic Capital Letter Yi
200 [0xb0] = 0x00b0, // Degree Sign
201 [0xb1] = 0x00b1, // Plus-Minus Sign
202 [0xb2] = 0x0406, // Cyrillic Capital Letter Byelorussian-Ukrainian I
203 [0xb3] = 0x0456, // Cyrillic Small Letter Byelorussian-Ukrainian I
204 [0xb4] = 0x0491, // Cyrillic Small Letter Ghe With Upturn
205 [0xb5] = 0x00b5, // Micro Sign
206 [0xb6] = 0x00b6, // Pilcrow Sign
207 [0xb7] = 0x00b7, // Middle Dot
208 [0xb8] = 0x0451, // Cyrillic Small Letter Io
209 [0xb9] = 0x2116, // Numero Sign
210 [0xba] = 0x0454, // Cyrillic Small Letter Ukrainian Ie
211 [0xbb] = 0x00bb, // Right-Pointing Double Angle Quotation Mark
212 [0xbc] = 0x0458, // Cyrillic Small Letter Je
213 [0xbd] = 0x0405, // Cyrillic Capital Letter Dze
214 [0xbe] = 0x0455, // Cyrillic Small Letter Dze
215 [0xbf] = 0x0457, // Cyrillic Small Letter Yi
216 [0xc0] = 0x0410, // Cyrillic Capital Letter A
217 [0xc1] = 0x0411, // Cyrillic Capital Letter Be
218 [0xc2] = 0x0412, // Cyrillic Capital Letter Ve
219 [0xc3] = 0x0413, // Cyrillic Capital Letter Ghe
220 [0xc4] = 0x0414, // Cyrillic Capital Letter De
221 [0xc5] = 0x0415, // Cyrillic Capital Letter Ie
222 [0xc6] = 0x0416, // Cyrillic Capital Letter Zhe
223 [0xc7] = 0x0417, // Cyrillic Capital Letter Ze
224 [0xc8] = 0x0418, // Cyrillic Capital Letter I
225 [0xc9] = 0x0419, // Cyrillic Capital Letter Short I
226 [0xca] = 0x041a, // Cyrillic Capital Letter Ka
227 [0xcb] = 0x041b, // Cyrillic Capital Letter El
228 [0xcc] = 0x041c, // Cyrillic Capital Letter Em
229 [0xcd] = 0x041d, // Cyrillic Capital Letter En
230 [0xce] = 0x041e, // Cyrillic Capital Letter O
231 [0xcf] = 0x041f, // Cyrillic Capital Letter Pe
232 [0xd0] = 0x0420, // Cyrillic Capital Letter Er
233 [0xd1] = 0x0421, // Cyrillic Capital Letter Es
234 [0xd2] = 0x0422, // Cyrillic Capital Letter Te
235 [0xd3] = 0x0423, // Cyrillic Capital Letter U
236 [0xd4] = 0x0424, // Cyrillic Capital Letter Ef
237 [0xd5] = 0x0425, // Cyrillic Capital Letter Ha
238 [0xd6] = 0x0426, // Cyrillic Capital Letter Tse
239 [0xd7] = 0x0427, // Cyrillic Capital Letter Che
240 [0xd8] = 0x0428, // Cyrillic Capital Letter Sha
241 [0xd9] = 0x0429, // Cyrillic Capital Letter Shcha
242 [0xda] = 0x042a, // Cyrillic Capital Letter Hard Sign
243 [0xdb] = 0x042b, // Cyrillic Capital Letter Yeru
244 [0xdc] = 0x042c, // Cyrillic Capital Letter Soft Sign
245 [0xdd] = 0x042d, // Cyrillic Capital Letter E
246 [0xde] = 0x042e, // Cyrillic Capital Letter Yu
247 [0xdf] = 0x042f, // Cyrillic Capital Letter Ya
248 [0xe0] = 0x0430, // Cyrillic Small Letter A
249 [0xe1] = 0x0431, // Cyrillic Small Letter Be
250 [0xe2] = 0x0432, // Cyrillic Small Letter Ve
251 [0xe3] = 0x0433, // Cyrillic Small Letter Ghe
252 [0xe4] = 0x0434, // Cyrillic Small Letter De
253 [0xe5] = 0x0435, // Cyrillic Small Letter Ie
254 [0xe6] = 0x0436, // Cyrillic Small Letter Zhe
255 [0xe7] = 0x0437, // Cyrillic Small Letter Ze
256 [0xe8] = 0x0438, // Cyrillic Small Letter I
257 [0xe9] = 0x0439, // Cyrillic Small Letter Short I
258 [0xea] = 0x043a, // Cyrillic Small Letter Ka
259 [0xeb] = 0x043b, // Cyrillic Small Letter El
260 [0xec] = 0x043c, // Cyrillic Small Letter Em
261 [0xed] = 0x043d, // Cyrillic Small Letter En
262 [0xee] = 0x043e, // Cyrillic Small Letter O
263 [0xef] = 0x043f, // Cyrillic Small Letter Pe
264 [0xf0] = 0x0440, // Cyrillic Small Letter Er
265 [0xf1] = 0x0441, // Cyrillic Small Letter Es
266 [0xf2] = 0x0442, // Cyrillic Small Letter Te
267 [0xf3] = 0x0443, // Cyrillic Small Letter U
268 [0xf4] = 0x0444, // Cyrillic Small Letter Ef
269 [0xf5] = 0x0445, // Cyrillic Small Letter Ha
270 [0xf6] = 0x0446, // Cyrillic Small Letter Tse
271 [0xf7] = 0x0447, // Cyrillic Small Letter Che
272 [0xf8] = 0x0448, // Cyrillic Small Letter Sha
273 [0xf9] = 0x0449, // Cyrillic Small Letter Shcha
274 [0xfa] = 0x044a, // Cyrillic Small Letter Hard Sign
275 [0xfb] = 0x044b, // Cyrillic Small Letter Yeru
276 [0xfc] = 0x044c, // Cyrillic Small Letter Soft Sign
277 [0xfd] = 0x044d, // Cyrillic Small Letter E
278 [0xfe] = 0x044e, // Cyrillic Small Letter Yu
279 [0xff] = 0x044f, // Cyrillic Small Letter Ya
282 static const wchar_t WIN1252_UNICODE_TABLE[] = {
283 [0x00] = 0x0000, // Null
284 [0x01] = 0x0001, // Start Of Heading
285 [0x02] = 0x0002, // Start Of Text
286 [0x03] = 0x0003, // End Of Text
287 [0x04] = 0x0004, // End Of Transmission
288 [0x05] = 0x0005, // Enquiry
289 [0x06] = 0x0006, // Acknowledge
290 [0x07] = 0x0007, // Bell
291 [0x08] = 0x0008, // Backspace
292 [0x09] = 0x0009, // Horizontal Tabulation
293 [0x0a] = 0x000a, // Line Feed
294 [0x0b] = 0x000b, // Vertical Tabulation
295 [0x0c] = 0x000c, // Form Feed
296 [0x0d] = 0x000d, // Carriage Return
297 [0x0e] = 0x000e, // Shift Out
298 [0x0f] = 0x000f, // Shift In
299 [0x10] = 0x0010, // Data Link Escape
300 [0x11] = 0x0011, // Device Control One
301 [0x12] = 0x0012, // Device Control Two
302 [0x13] = 0x0013, // Device Control Three
303 [0x14] = 0x0014, // Device Control Four
304 [0x15] = 0x0015, // Negative Acknowledge
305 [0x16] = 0x0016, // Synchronous Idle
306 [0x17] = 0x0017, // End Of Transmission Block
307 [0x18] = 0x0018, // Cancel
308 [0x19] = 0x0019, // End Of Medium
309 [0x1a] = 0x001a, // Substitute
310 [0x1b] = 0x001b, // Escape
311 [0x1c] = 0x001c, // File Separator
312 [0x1d] = 0x001d, // Group Separator
313 [0x1e] = 0x001e, // Record Separator
314 [0x1f] = 0x001f, // Unit Separator
315 [0x20] = 0x0020, // Space
316 [0x21] = 0x0021, // Exclamation Mark
317 [0x22] = 0x0022, // Quotation Mark
318 [0x23] = 0x0023, // Number Sign
319 [0x24] = 0x0024, // Dollar Sign
320 [0x25] = 0x0025, // Percent Sign
321 [0x26] = 0x0026, // Ampersand
322 [0x27] = 0x0027, // Apostrophe
323 [0x28] = 0x0028, // Left Parenthesis
324 [0x29] = 0x0029, // Right Parenthesis
325 [0x2a] = 0x002a, // Asterisk
326 [0x2b] = 0x002b, // Plus Sign
327 [0x2c] = 0x002c, // Comma
328 [0x2d] = 0x002d, // Hyphen-Minus
329 [0x2e] = 0x002e, // Full Stop
330 [0x2f] = 0x002f, // Solidus
331 [0x30] = 0x0030, // Digit Zero
332 [0x31] = 0x0031, // Digit One
333 [0x32] = 0x0032, // Digit Two
334 [0x33] = 0x0033, // Digit Three
335 [0x34] = 0x0034, // Digit Four
336 [0x35] = 0x0035, // Digit Five
337 [0x36] = 0x0036, // Digit Six
338 [0x37] = 0x0037, // Digit Seven
339 [0x38] = 0x0038, // Digit Eight
340 [0x39] = 0x0039, // Digit Nine
341 [0x3a] = 0x003a, // Colon
342 [0x3b] = 0x003b, // Semicolon
343 [0x3c] = 0x003c, // Less-Than Sign
344 [0x3d] = 0x003d, // Equals Sign
345 [0x3e] = 0x003e, // Greater-Than Sign
346 [0x3f] = 0x003f, // Question Mark
347 [0x40] = 0x0040, // Commercial At
348 [0x41] = 0x0041, // Latin Capital Letter A
349 [0x42] = 0x0042, // Latin Capital Letter B
350 [0x43] = 0x0043, // Latin Capital Letter C
351 [0x44] = 0x0044, // Latin Capital Letter D
352 [0x45] = 0x0045, // Latin Capital Letter E
353 [0x46] = 0x0046, // Latin Capital Letter F
354 [0x47] = 0x0047, // Latin Capital Letter G
355 [0x48] = 0x0048, // Latin Capital Letter H
356 [0x49] = 0x0049, // Latin Capital Letter I
357 [0x4a] = 0x004a, // Latin Capital Letter J
358 [0x4b] = 0x004b, // Latin Capital Letter K
359 [0x4c] = 0x004c, // Latin Capital Letter L
360 [0x4d] = 0x004d, // Latin Capital Letter M
361 [0x4e] = 0x004e, // Latin Capital Letter N
362 [0x4f] = 0x004f, // Latin Capital Letter O
363 [0x50] = 0x0050, // Latin Capital Letter P
364 [0x51] = 0x0051, // Latin Capital Letter Q
365 [0x52] = 0x0052, // Latin Capital Letter R
366 [0x53] = 0x0053, // Latin Capital Letter S
367 [0x54] = 0x0054, // Latin Capital Letter T
368 [0x55] = 0x0055, // Latin Capital Letter U
369 [0x56] = 0x0056, // Latin Capital Letter V
370 [0x57] = 0x0057, // Latin Capital Letter W
371 [0x58] = 0x0058, // Latin Capital Letter X
372 [0x59] = 0x0059, // Latin Capital Letter Y
373 [0x5a] = 0x005a, // Latin Capital Letter Z
374 [0x5b] = 0x005b, // Left Square Bracket
375 [0x5c] = 0x005c, // Reverse Solidus
376 [0x5d] = 0x005d, // Right Square Bracket
377 [0x5e] = 0x005e, // Circumflex Accent
378 [0x5f] = 0x005f, // Low Line
379 [0x60] = 0x0060, // Grave Accent
380 [0x61] = 0x0061, // Latin Small Letter A
381 [0x62] = 0x0062, // Latin Small Letter B
382 [0x63] = 0x0063, // Latin Small Letter C
383 [0x64] = 0x0064, // Latin Small Letter D
384 [0x65] = 0x0065, // Latin Small Letter E
385 [0x66] = 0x0066, // Latin Small Letter F
386 [0x67] = 0x0067, // Latin Small Letter G
387 [0x68] = 0x0068, // Latin Small Letter H
388 [0x69] = 0x0069, // Latin Small Letter I
389 [0x6a] = 0x006a, // Latin Small Letter J
390 [0x6b] = 0x006b, // Latin Small Letter K
391 [0x6c] = 0x006c, // Latin Small Letter L
392 [0x6d] = 0x006d, // Latin Small Letter M
393 [0x6e] = 0x006e, // Latin Small Letter N
394 [0x6f] = 0x006f, // Latin Small Letter O
395 [0x70] = 0x0070, // Latin Small Letter P
396 [0x71] = 0x0071, // Latin Small Letter Q
397 [0x72] = 0x0072, // Latin Small Letter R
398 [0x73] = 0x0073, // Latin Small Letter S
399 [0x74] = 0x0074, // Latin Small Letter T
400 [0x75] = 0x0075, // Latin Small Letter U
401 [0x76] = 0x0076, // Latin Small Letter V
402 [0x77] = 0x0077, // Latin Small Letter W
403 [0x78] = 0x0078, // Latin Small Letter X
404 [0x79] = 0x0079, // Latin Small Letter Y
405 [0x7a] = 0x007a, // Latin Small Letter Z
406 [0x7b] = 0x007b, // Left Curly Bracket
407 [0x7c] = 0x007c, // Vertical Line
408 [0x7d] = 0x007d, // Right Curly Bracket
409 [0x7e] = 0x007e, // Tilde
410 [0x7f] = 0x007f, // Delete
411 [0x80] = 0x20ac, // Euro Sign
412 [0x81] = 0x0081, // ??
413 [0x82] = 0x201a, // Single Low-9 Quotation Mark
414 [0x83] = 0x0192, // Latin Small Letter F With Hook
415 [0x84] = 0x201e, // Double Low-9 Quotation Mark
416 [0x85] = 0x2026, // Horizontal Ellipsis
417 [0x86] = 0x2020, // Dagger
418 [0x87] = 0x2021, // Double Dagger
419 [0x88] = 0x02c6, // Modifier Letter Circumflex Accent
420 [0x89] = 0x2030, // Per Mille Sign
421 [0x8a] = 0x0160, // Latin Capital Letter S With Caron
422 [0x8b] = 0x2039, // Single Left-Pointing Angle Quotation Mark
423 [0x8c] = 0x0152, // Latin Capital Ligature Oe
424 [0x8d] = 0x008d, // ??
425 [0x8e] = 0x017d, // Latin Capital Letter Z With Caron
426 [0x8f] = 0x008f, // ??
427 [0x90] = 0x0090, // ??
428 [0x91] = 0x2018, // Left Single Quotation Mark
429 [0x92] = 0x2019, // Right Single Quotation Mark
430 [0x93] = 0x201c, // Left Double Quotation Mark
431 [0x94] = 0x201d, // Right Double Quotation Mark
432 [0x95] = 0x2022, // Bullet
433 [0x96] = 0x2013, // En Dash
434 [0x97] = 0x2014, // Em Dash
435 [0x98] = 0x02dc, // Small Tilde
436 [0x99] = 0x2122, // Trade Mark Sign
437 [0x9a] = 0x0161, // Latin Small Letter S With Caron
438 [0x9b] = 0x203a, // Single Right-Pointing Angle Quotation Mark
439 [0x9c] = 0x0153, // Latin Small Ligature Oe
440 [0x9d] = 0x009d, // ??
441 [0x9e] = 0x017e, // Latin Small Letter Z With Caron
442 [0x9f] = 0x0178, // Latin Capital Letter Y With Diaeresis
443 [0xa0] = 0x00a0, // No-Break Space
444 [0xa1] = 0x00a1, // Inverted Exclamation Mark
445 [0xa2] = 0x00a2, // Cent Sign
446 [0xa3] = 0x00a3, // Pound Sign
447 [0xa4] = 0x00a4, // Currency Sign
448 [0xa5] = 0x00a5, // Yen Sign
449 [0xa6] = 0x00a6, // Broken Bar
450 [0xa7] = 0x00a7, // Section Sign
451 [0xa8] = 0x00a8, // Diaeresis
452 [0xa9] = 0x00a9, // Copyright Sign
453 [0xaa] = 0x00aa, // Feminine Ordinal Indicator
454 [0xab] = 0x00ab, // Left-Pointing Double Angle Quotation Mark
455 [0xac] = 0x00ac, // Not Sign
456 [0xad] = 0x00ad, // Soft Hyphen
457 [0xae] = 0x00ae, // Registered Sign
458 [0xaf] = 0x00af, // Macron
459 [0xb0] = 0x00b0, // Degree Sign
460 [0xb1] = 0x00b1, // Plus-Minus Sign
461 [0xb2] = 0x00b2, // Superscript Two
462 [0xb3] = 0x00b3, // Superscript Three
463 [0xb4] = 0x00b4, // Acute Accent
464 [0xb5] = 0x00b5, // Micro Sign
465 [0xb6] = 0x00b6, // Pilcrow Sign
466 [0xb7] = 0x00b7, // Middle Dot
467 [0xb8] = 0x00b8, // Cedilla
468 [0xb9] = 0x00b9, // Superscript One
469 [0xba] = 0x00ba, // Masculine Ordinal Indicator
470 [0xbb] = 0x00bb, // Right-Pointing Double Angle Quotation Mark
471 [0xbc] = 0x00bc, // Vulgar Fraction One Quarter
472 [0xbd] = 0x00bd, // Vulgar Fraction One Half
473 [0xbe] = 0x00be, // Vulgar Fraction Three Quarters
474 [0xbf] = 0x00bf, // Inverted Question Mark
475 [0xc0] = 0x00c0, // Latin Capital Letter A With Grave
476 [0xc1] = 0x00c1, // Latin Capital Letter A With Acute
477 [0xc2] = 0x00c2, // Latin Capital Letter A With Circumflex
478 [0xc3] = 0x00c3, // Latin Capital Letter A With Tilde
479 [0xc4] = 0x00c4, // Latin Capital Letter A With Diaeresis
480 [0xc5] = 0x00c5, // Latin Capital Letter A With Ring Above
481 [0xc6] = 0x00c6, // Latin Capital Ligature Ae
482 [0xc7] = 0x00c7, // Latin Capital Letter C With Cedilla
483 [0xc8] = 0x00c8, // Latin Capital Letter E With Grave
484 [0xc9] = 0x00c9, // Latin Capital Letter E With Acute
485 [0xca] = 0x00ca, // Latin Capital Letter E With Circumflex
486 [0xcb] = 0x00cb, // Latin Capital Letter E With Diaeresis
487 [0xcc] = 0x00cc, // Latin Capital Letter I With Grave
488 [0xcd] = 0x00cd, // Latin Capital Letter I With Acute
489 [0xce] = 0x00ce, // Latin Capital Letter I With Circumflex
490 [0xcf] = 0x00cf, // Latin Capital Letter I With Diaeresis
491 [0xd0] = 0x00d0, // Latin Capital Letter Eth
492 [0xd1] = 0x00d1, // Latin Capital Letter N With Tilde
493 [0xd2] = 0x00d2, // Latin Capital Letter O With Grave
494 [0xd3] = 0x00d3, // Latin Capital Letter O With Acute
495 [0xd4] = 0x00d4, // Latin Capital Letter O With Circumflex
496 [0xd5] = 0x00d5, // Latin Capital Letter O With Tilde
497 [0xd6] = 0x00d6, // Latin Capital Letter O With Diaeresis
498 [0xd7] = 0x00d7, // Multiplication Sign
499 [0xd8] = 0x00d8, // Latin Capital Letter O With Stroke
500 [0xd9] = 0x00d9, // Latin Capital Letter U With Grave
501 [0xda] = 0x00da, // Latin Capital Letter U With Acute
502 [0xdb] = 0x00db, // Latin Capital Letter U With Circumflex
503 [0xdc] = 0x00dc, // Latin Capital Letter U With Diaeresis
504 [0xdd] = 0x00dd, // Latin Capital Letter Y With Acute
505 [0xde] = 0x00de, // Latin Capital Letter Thorn
506 [0xdf] = 0x00df, // Latin Small Letter Sharp S
507 [0xe0] = 0x00e0, // Latin Small Letter A With Grave
508 [0xe1] = 0x00e1, // Latin Small Letter A With Acute
509 [0xe2] = 0x00e2, // Latin Small Letter A With Circumflex
510 [0xe3] = 0x00e3, // Latin Small Letter A With Tilde
511 [0xe4] = 0x00e4, // Latin Small Letter A With Diaeresis
512 [0xe5] = 0x00e5, // Latin Small Letter A With Ring Above
513 [0xe6] = 0x00e6, // Latin Small Ligature Ae
514 [0xe7] = 0x00e7, // Latin Small Letter C With Cedilla
515 [0xe8] = 0x00e8, // Latin Small Letter E With Grave
516 [0xe9] = 0x00e9, // Latin Small Letter E With Acute
517 [0xea] = 0x00ea, // Latin Small Letter E With Circumflex
518 [0xeb] = 0x00eb, // Latin Small Letter E With Diaeresis
519 [0xec] = 0x00ec, // Latin Small Letter I With Grave
520 [0xed] = 0x00ed, // Latin Small Letter I With Acute
521 [0xee] = 0x00ee, // Latin Small Letter I With Circumflex
522 [0xef] = 0x00ef, // Latin Small Letter I With Diaeresis
523 [0xf0] = 0x00f0, // Latin Small Letter Eth
524 [0xf1] = 0x00f1, // Latin Small Letter N With Tilde
525 [0xf2] = 0x00f2, // Latin Small Letter O With Grave
526 [0xf3] = 0x00f3, // Latin Small Letter O With Acute
527 [0xf4] = 0x00f4, // Latin Small Letter O With Circumflex
528 [0xf5] = 0x00f5, // Latin Small Letter O With Tilde
529 [0xf6] = 0x00f6, // Latin Small Letter O With Diaeresis
530 [0xf7] = 0x00f7, // Division Sign
531 [0xf8] = 0x00f8, // Latin Small Letter O With Stroke
532 [0xf9] = 0x00f9, // Latin Small Letter U With Grave
533 [0xfa] = 0x00fa, // Latin Small Letter U With Acute
534 [0xfb] = 0x00fb, // Latin Small Letter U With Circumflex
535 [0xfc] = 0x00fc, // Latin Small Letter U With Diaeresis
536 [0xfd] = 0x00fd, // Latin Small Letter Y With Acute
537 [0xfe] = 0x00fe, // Latin Small Letter Thorn
538 [0xff] = 0x00ff, // Latin Small Letter Y With Diaeresis
541 static const wchar_t ISO_8859_1_UNICODE_TABLE[] = {
542 [0x00] = 0x0000, // NULL
543 [0x01] = 0x0001, // START OF HEADING
544 [0x02] = 0x0002, // START OF TEXT
545 [0x03] = 0x0003, // END OF TEXT
546 [0x04] = 0x0004, // END OF TRANSMISSION
547 [0x05] = 0x0005, // ENQUIRY
548 [0x06] = 0x0006, // ACKNOWLEDGE
549 [0x07] = 0x0007, // BELL
550 [0x08] = 0x0008, // BACKSPACE
551 [0x09] = 0x0009, // HORIZONTAL TABULATION
552 [0x0A] = 0x000A, // LINE FEED
553 [0x0B] = 0x000B, // VERTICAL TABULATION
554 [0x0C] = 0x000C, // FORM FEED
555 [0x0D] = 0x000D, // CARRIAGE RETURN
556 [0x0E] = 0x000E, // SHIFT OUT
557 [0x0F] = 0x000F, // SHIFT IN
558 [0x10] = 0x0010, // DATA LINK ESCAPE
559 [0x11] = 0x0011, // DEVICE CONTROL ONE
560 [0x12] = 0x0012, // DEVICE CONTROL TWO
561 [0x13] = 0x0013, // DEVICE CONTROL THREE
562 [0x14] = 0x0014, // DEVICE CONTROL FOUR
563 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
564 [0x16] = 0x0016, // SYNCHRONOUS IDLE
565 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
566 [0x18] = 0x0018, // CANCEL
567 [0x19] = 0x0019, // END OF MEDIUM
568 [0x1A] = 0x001A, // SUBSTITUTE
569 [0x1B] = 0x001B, // ESCAPE
570 [0x1C] = 0x001C, // FILE SEPARATOR
571 [0x1D] = 0x001D, // GROUP SEPARATOR
572 [0x1E] = 0x001E, // RECORD SEPARATOR
573 [0x1F] = 0x001F, // UNIT SEPARATOR
574 [0x20] = 0x0020, // SPACE
575 [0x21] = 0x0021, // EXCLAMATION MARK
576 [0x22] = 0x0022, // QUOTATION MARK
577 [0x23] = 0x0023, // NUMBER SIGN
578 [0x24] = 0x0024, // DOLLAR SIGN
579 [0x25] = 0x0025, // PERCENT SIGN
580 [0x26] = 0x0026, // AMPERSAND
581 [0x27] = 0x0027, // APOSTROPHE
582 [0x28] = 0x0028, // LEFT PARENTHESIS
583 [0x29] = 0x0029, // RIGHT PARENTHESIS
584 [0x2A] = 0x002A, // ASTERISK
585 [0x2B] = 0x002B, // PLUS SIGN
586 [0x2C] = 0x002C, // COMMA
587 [0x2D] = 0x002D, // HYPHEN-MINUS
588 [0x2E] = 0x002E, // FULL STOP
589 [0x2F] = 0x002F, // SOLIDUS
590 [0x30] = 0x0030, // DIGIT ZERO
591 [0x31] = 0x0031, // DIGIT ONE
592 [0x32] = 0x0032, // DIGIT TWO
593 [0x33] = 0x0033, // DIGIT THREE
594 [0x34] = 0x0034, // DIGIT FOUR
595 [0x35] = 0x0035, // DIGIT FIVE
596 [0x36] = 0x0036, // DIGIT SIX
597 [0x37] = 0x0037, // DIGIT SEVEN
598 [0x38] = 0x0038, // DIGIT EIGHT
599 [0x39] = 0x0039, // DIGIT NINE
600 [0x3A] = 0x003A, // COLON
601 [0x3B] = 0x003B, // SEMICOLON
602 [0x3C] = 0x003C, // LESS-THAN SIGN
603 [0x3D] = 0x003D, // EQUALS SIGN
604 [0x3E] = 0x003E, // GREATER-THAN SIGN
605 [0x3F] = 0x003F, // QUESTION MARK
606 [0x40] = 0x0040, // COMMERCIAL AT
607 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
608 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
609 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
610 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
611 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
612 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
613 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
614 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
615 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
616 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
617 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
618 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
619 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
620 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
621 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
622 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
623 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
624 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
625 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
626 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
627 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
628 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
629 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
630 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
631 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
632 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
633 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
634 [0x5C] = 0x005C, // REVERSE SOLIDUS
635 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
636 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
637 [0x5F] = 0x005F, // LOW LINE
638 [0x60] = 0x0060, // GRAVE ACCENT
639 [0x61] = 0x0061, // LATIN SMALL LETTER A
640 [0x62] = 0x0062, // LATIN SMALL LETTER B
641 [0x63] = 0x0063, // LATIN SMALL LETTER C
642 [0x64] = 0x0064, // LATIN SMALL LETTER D
643 [0x65] = 0x0065, // LATIN SMALL LETTER E
644 [0x66] = 0x0066, // LATIN SMALL LETTER F
645 [0x67] = 0x0067, // LATIN SMALL LETTER G
646 [0x68] = 0x0068, // LATIN SMALL LETTER H
647 [0x69] = 0x0069, // LATIN SMALL LETTER I
648 [0x6A] = 0x006A, // LATIN SMALL LETTER J
649 [0x6B] = 0x006B, // LATIN SMALL LETTER K
650 [0x6C] = 0x006C, // LATIN SMALL LETTER L
651 [0x6D] = 0x006D, // LATIN SMALL LETTER M
652 [0x6E] = 0x006E, // LATIN SMALL LETTER N
653 [0x6F] = 0x006F, // LATIN SMALL LETTER O
654 [0x70] = 0x0070, // LATIN SMALL LETTER P
655 [0x71] = 0x0071, // LATIN SMALL LETTER Q
656 [0x72] = 0x0072, // LATIN SMALL LETTER R
657 [0x73] = 0x0073, // LATIN SMALL LETTER S
658 [0x74] = 0x0074, // LATIN SMALL LETTER T
659 [0x75] = 0x0075, // LATIN SMALL LETTER U
660 [0x76] = 0x0076, // LATIN SMALL LETTER V
661 [0x77] = 0x0077, // LATIN SMALL LETTER W
662 [0x78] = 0x0078, // LATIN SMALL LETTER X
663 [0x79] = 0x0079, // LATIN SMALL LETTER Y
664 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
665 [0x7B] = 0x007B, // LEFT CURLY BRACKET
666 [0x7C] = 0x007C, // VERTICAL LINE
667 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
668 [0x7E] = 0x007E, // TILDE
669 [0x7F] = 0x007F, // DELETE
670 [0x80] = 0x0080, // <control>
671 [0x81] = 0x0081, // <control>
672 [0x82] = 0x0082, // <control>
673 [0x83] = 0x0083, // <control>
674 [0x84] = 0x0084, // <control>
675 [0x85] = 0x0085, // <control>
676 [0x86] = 0x0086, // <control>
677 [0x87] = 0x0087, // <control>
678 [0x88] = 0x0088, // <control>
679 [0x89] = 0x0089, // <control>
680 [0x8A] = 0x008A, // <control>
681 [0x8B] = 0x008B, // <control>
682 [0x8C] = 0x008C, // <control>
683 [0x8D] = 0x008D, // <control>
684 [0x8E] = 0x008E, // <control>
685 [0x8F] = 0x008F, // <control>
686 [0x90] = 0x0090, // <control>
687 [0x91] = 0x0091, // <control>
688 [0x92] = 0x0092, // <control>
689 [0x93] = 0x0093, // <control>
690 [0x94] = 0x0094, // <control>
691 [0x95] = 0x0095, // <control>
692 [0x96] = 0x0096, // <control>
693 [0x97] = 0x0097, // <control>
694 [0x98] = 0x0098, // <control>
695 [0x99] = 0x0099, // <control>
696 [0x9A] = 0x009A, // <control>
697 [0x9B] = 0x009B, // <control>
698 [0x9C] = 0x009C, // <control>
699 [0x9D] = 0x009D, // <control>
700 [0x9E] = 0x009E, // <control>
701 [0x9F] = 0x009F, // <control>
702 [0xA0] = 0x00A0, // NO-BREAK SPACE
703 [0xA1] = 0x00A1, // INVERTED EXCLAMATION MARK
704 [0xA2] = 0x00A2, // CENT SIGN
705 [0xA3] = 0x00A3, // POUND SIGN
706 [0xA4] = 0x00A4, // CURRENCY SIGN
707 [0xA5] = 0x00A5, // YEN SIGN
708 [0xA6] = 0x00A6, // BROKEN BAR
709 [0xA7] = 0x00A7, // SECTION SIGN
710 [0xA8] = 0x00A8, // DIAERESIS
711 [0xA9] = 0x00A9, // COPYRIGHT SIGN
712 [0xAA] = 0x00AA, // FEMININE ORDINAL INDICATOR
713 [0xAB] = 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
714 [0xAC] = 0x00AC, // NOT SIGN
715 [0xAD] = 0x00AD, // SOFT HYPHEN
716 [0xAE] = 0x00AE, // REGISTERED SIGN
717 [0xAF] = 0x00AF, // MACRON
718 [0xB0] = 0x00B0, // DEGREE SIGN
719 [0xB1] = 0x00B1, // PLUS-MINUS SIGN
720 [0xB2] = 0x00B2, // SUPERSCRIPT TWO
721 [0xB3] = 0x00B3, // SUPERSCRIPT THREE
722 [0xB4] = 0x00B4, // ACUTE ACCENT
723 [0xB5] = 0x00B5, // MICRO SIGN
724 [0xB6] = 0x00B6, // PILCROW SIGN
725 [0xB7] = 0x00B7, // MIDDLE DOT
726 [0xB8] = 0x00B8, // CEDILLA
727 [0xB9] = 0x00B9, // SUPERSCRIPT ONE
728 [0xBA] = 0x00BA, // MASCULINE ORDINAL INDICATOR
729 [0xBB] = 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
730 [0xBC] = 0x00BC, // VULGAR FRACTION ONE QUARTER
731 [0xBD] = 0x00BD, // VULGAR FRACTION ONE HALF
732 [0xBE] = 0x00BE, // VULGAR FRACTION THREE QUARTERS
733 [0xBF] = 0x00BF, // INVERTED QUESTION MARK
734 [0xC0] = 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
735 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
736 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
737 [0xC3] = 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
738 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
739 [0xC5] = 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
740 [0xC6] = 0x00C6, // LATIN CAPITAL LETTER AE
741 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
742 [0xC8] = 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
743 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
744 [0xCA] = 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
745 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
746 [0xCC] = 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
747 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
748 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
749 [0xCF] = 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
750 [0xD0] = 0x00D0, // LATIN CAPITAL LETTER ETH (Icelandic)
751 [0xD1] = 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
752 [0xD2] = 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
753 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
754 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
755 [0xD5] = 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
756 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
757 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
758 [0xD8] = 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
759 [0xD9] = 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
760 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
761 [0xDB] = 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
762 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
763 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
764 [0xDE] = 0x00DE, // LATIN CAPITAL LETTER THORN (Icelandic)
765 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S (German)
766 [0xE0] = 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
767 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
768 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
769 [0xE3] = 0x00E3, // LATIN SMALL LETTER A WITH TILDE
770 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
771 [0xE5] = 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
772 [0xE6] = 0x00E6, // LATIN SMALL LETTER AE
773 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
774 [0xE8] = 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
775 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
776 [0xEA] = 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
777 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
778 [0xEC] = 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
779 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
780 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
781 [0xEF] = 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
782 [0xF0] = 0x00F0, // LATIN SMALL LETTER ETH (Icelandic)
783 [0xF1] = 0x00F1, // LATIN SMALL LETTER N WITH TILDE
784 [0xF2] = 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
785 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
786 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
787 [0xF5] = 0x00F5, // LATIN SMALL LETTER O WITH TILDE
788 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
789 [0xF7] = 0x00F7, // DIVISION SIGN
790 [0xF8] = 0x00F8, // LATIN SMALL LETTER O WITH STROKE
791 [0xF9] = 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
792 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
793 [0xFB] = 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
794 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
795 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
796 [0xFE] = 0x00FE, // LATIN SMALL LETTER THORN (Icelandic)
797 [0xFF] = 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
800 static const wchar_t ISO_8859_2_UNICODE_TABLE[] = {
801 [0x00] = 0x0000, // NULL
802 [0x01] = 0x0001, // START OF HEADING
803 [0x02] = 0x0002, // START OF TEXT
804 [0x03] = 0x0003, // END OF TEXT
805 [0x04] = 0x0004, // END OF TRANSMISSION
806 [0x05] = 0x0005, // ENQUIRY
807 [0x06] = 0x0006, // ACKNOWLEDGE
808 [0x07] = 0x0007, // BELL
809 [0x08] = 0x0008, // BACKSPACE
810 [0x09] = 0x0009, // HORIZONTAL TABULATION
811 [0x0A] = 0x000A, // LINE FEED
812 [0x0B] = 0x000B, // VERTICAL TABULATION
813 [0x0C] = 0x000C, // FORM FEED
814 [0x0D] = 0x000D, // CARRIAGE RETURN
815 [0x0E] = 0x000E, // SHIFT OUT
816 [0x0F] = 0x000F, // SHIFT IN
817 [0x10] = 0x0010, // DATA LINK ESCAPE
818 [0x11] = 0x0011, // DEVICE CONTROL ONE
819 [0x12] = 0x0012, // DEVICE CONTROL TWO
820 [0x13] = 0x0013, // DEVICE CONTROL THREE
821 [0x14] = 0x0014, // DEVICE CONTROL FOUR
822 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
823 [0x16] = 0x0016, // SYNCHRONOUS IDLE
824 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
825 [0x18] = 0x0018, // CANCEL
826 [0x19] = 0x0019, // END OF MEDIUM
827 [0x1A] = 0x001A, // SUBSTITUTE
828 [0x1B] = 0x001B, // ESCAPE
829 [0x1C] = 0x001C, // FILE SEPARATOR
830 [0x1D] = 0x001D, // GROUP SEPARATOR
831 [0x1E] = 0x001E, // RECORD SEPARATOR
832 [0x1F] = 0x001F, // UNIT SEPARATOR
833 [0x20] = 0x0020, // SPACE
834 [0x21] = 0x0021, // EXCLAMATION MARK
835 [0x22] = 0x0022, // QUOTATION MARK
836 [0x23] = 0x0023, // NUMBER SIGN
837 [0x24] = 0x0024, // DOLLAR SIGN
838 [0x25] = 0x0025, // PERCENT SIGN
839 [0x26] = 0x0026, // AMPERSAND
840 [0x27] = 0x0027, // APOSTROPHE
841 [0x28] = 0x0028, // LEFT PARENTHESIS
842 [0x29] = 0x0029, // RIGHT PARENTHESIS
843 [0x2A] = 0x002A, // ASTERISK
844 [0x2B] = 0x002B, // PLUS SIGN
845 [0x2C] = 0x002C, // COMMA
846 [0x2D] = 0x002D, // HYPHEN-MINUS
847 [0x2E] = 0x002E, // FULL STOP
848 [0x2F] = 0x002F, // SOLIDUS
849 [0x30] = 0x0030, // DIGIT ZERO
850 [0x31] = 0x0031, // DIGIT ONE
851 [0x32] = 0x0032, // DIGIT TWO
852 [0x33] = 0x0033, // DIGIT THREE
853 [0x34] = 0x0034, // DIGIT FOUR
854 [0x35] = 0x0035, // DIGIT FIVE
855 [0x36] = 0x0036, // DIGIT SIX
856 [0x37] = 0x0037, // DIGIT SEVEN
857 [0x38] = 0x0038, // DIGIT EIGHT
858 [0x39] = 0x0039, // DIGIT NINE
859 [0x3A] = 0x003A, // COLON
860 [0x3B] = 0x003B, // SEMICOLON
861 [0x3C] = 0x003C, // LESS-THAN SIGN
862 [0x3D] = 0x003D, // EQUALS SIGN
863 [0x3E] = 0x003E, // GREATER-THAN SIGN
864 [0x3F] = 0x003F, // QUESTION MARK
865 [0x40] = 0x0040, // COMMERCIAL AT
866 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
867 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
868 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
869 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
870 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
871 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
872 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
873 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
874 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
875 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
876 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
877 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
878 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
879 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
880 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
881 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
882 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
883 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
884 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
885 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
886 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
887 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
888 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
889 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
890 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
891 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
892 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
893 [0x5C] = 0x005C, // REVERSE SOLIDUS
894 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
895 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
896 [0x5F] = 0x005F, // LOW LINE
897 [0x60] = 0x0060, // GRAVE ACCENT
898 [0x61] = 0x0061, // LATIN SMALL LETTER A
899 [0x62] = 0x0062, // LATIN SMALL LETTER B
900 [0x63] = 0x0063, // LATIN SMALL LETTER C
901 [0x64] = 0x0064, // LATIN SMALL LETTER D
902 [0x65] = 0x0065, // LATIN SMALL LETTER E
903 [0x66] = 0x0066, // LATIN SMALL LETTER F
904 [0x67] = 0x0067, // LATIN SMALL LETTER G
905 [0x68] = 0x0068, // LATIN SMALL LETTER H
906 [0x69] = 0x0069, // LATIN SMALL LETTER I
907 [0x6A] = 0x006A, // LATIN SMALL LETTER J
908 [0x6B] = 0x006B, // LATIN SMALL LETTER K
909 [0x6C] = 0x006C, // LATIN SMALL LETTER L
910 [0x6D] = 0x006D, // LATIN SMALL LETTER M
911 [0x6E] = 0x006E, // LATIN SMALL LETTER N
912 [0x6F] = 0x006F, // LATIN SMALL LETTER O
913 [0x70] = 0x0070, // LATIN SMALL LETTER P
914 [0x71] = 0x0071, // LATIN SMALL LETTER Q
915 [0x72] = 0x0072, // LATIN SMALL LETTER R
916 [0x73] = 0x0073, // LATIN SMALL LETTER S
917 [0x74] = 0x0074, // LATIN SMALL LETTER T
918 [0x75] = 0x0075, // LATIN SMALL LETTER U
919 [0x76] = 0x0076, // LATIN SMALL LETTER V
920 [0x77] = 0x0077, // LATIN SMALL LETTER W
921 [0x78] = 0x0078, // LATIN SMALL LETTER X
922 [0x79] = 0x0079, // LATIN SMALL LETTER Y
923 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
924 [0x7B] = 0x007B, // LEFT CURLY BRACKET
925 [0x7C] = 0x007C, // VERTICAL LINE
926 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
927 [0x7E] = 0x007E, // TILDE
928 [0x7F] = 0x007F, // DELETE
929 [0x80] = 0x0080, // <control>
930 [0x81] = 0x0081, // <control>
931 [0x82] = 0x0082, // <control>
932 [0x83] = 0x0083, // <control>
933 [0x84] = 0x0084, // <control>
934 [0x85] = 0x0085, // <control>
935 [0x86] = 0x0086, // <control>
936 [0x87] = 0x0087, // <control>
937 [0x88] = 0x0088, // <control>
938 [0x89] = 0x0089, // <control>
939 [0x8A] = 0x008A, // <control>
940 [0x8B] = 0x008B, // <control>
941 [0x8C] = 0x008C, // <control>
942 [0x8D] = 0x008D, // <control>
943 [0x8E] = 0x008E, // <control>
944 [0x8F] = 0x008F, // <control>
945 [0x90] = 0x0090, // <control>
946 [0x91] = 0x0091, // <control>
947 [0x92] = 0x0092, // <control>
948 [0x93] = 0x0093, // <control>
949 [0x94] = 0x0094, // <control>
950 [0x95] = 0x0095, // <control>
951 [0x96] = 0x0096, // <control>
952 [0x97] = 0x0097, // <control>
953 [0x98] = 0x0098, // <control>
954 [0x99] = 0x0099, // <control>
955 [0x9A] = 0x009A, // <control>
956 [0x9B] = 0x009B, // <control>
957 [0x9C] = 0x009C, // <control>
958 [0x9D] = 0x009D, // <control>
959 [0x9E] = 0x009E, // <control>
960 [0x9F] = 0x009F, // <control>
961 [0xA0] = 0x00A0, // NO-BREAK SPACE
962 [0xA1] = 0x0104, // LATIN CAPITAL LETTER A WITH OGONEK
963 [0xA2] = 0x02D8, // BREVE
964 [0xA3] = 0x0141, // LATIN CAPITAL LETTER L WITH STROKE
965 [0xA4] = 0x00A4, // CURRENCY SIGN
966 [0xA5] = 0x013D, // LATIN CAPITAL LETTER L WITH CARON
967 [0xA6] = 0x015A, // LATIN CAPITAL LETTER S WITH ACUTE
968 [0xA7] = 0x00A7, // SECTION SIGN
969 [0xA8] = 0x00A8, // DIAERESIS
970 [0xA9] = 0x0160, // LATIN CAPITAL LETTER S WITH CARON
971 [0xAA] = 0x015E, // LATIN CAPITAL LETTER S WITH CEDILLA
972 [0xAB] = 0x0164, // LATIN CAPITAL LETTER T WITH CARON
973 [0xAC] = 0x0179, // LATIN CAPITAL LETTER Z WITH ACUTE
974 [0xAD] = 0x00AD, // SOFT HYPHEN
975 [0xAE] = 0x017D, // LATIN CAPITAL LETTER Z WITH CARON
976 [0xAF] = 0x017B, // LATIN CAPITAL LETTER Z WITH DOT ABOVE
977 [0xB0] = 0x00B0, // DEGREE SIGN
978 [0xB1] = 0x0105, // LATIN SMALL LETTER A WITH OGONEK
979 [0xB2] = 0x02DB, // OGONEK
980 [0xB3] = 0x0142, // LATIN SMALL LETTER L WITH STROKE
981 [0xB4] = 0x00B4, // ACUTE ACCENT
982 [0xB5] = 0x013E, // LATIN SMALL LETTER L WITH CARON
983 [0xB6] = 0x015B, // LATIN SMALL LETTER S WITH ACUTE
984 [0xB7] = 0x02C7, // CARON
985 [0xB8] = 0x00B8, // CEDILLA
986 [0xB9] = 0x0161, // LATIN SMALL LETTER S WITH CARON
987 [0xBA] = 0x015F, // LATIN SMALL LETTER S WITH CEDILLA
988 [0xBB] = 0x0165, // LATIN SMALL LETTER T WITH CARON
989 [0xBC] = 0x017A, // LATIN SMALL LETTER Z WITH ACUTE
990 [0xBD] = 0x02DD, // DOUBLE ACUTE ACCENT
991 [0xBE] = 0x017E, // LATIN SMALL LETTER Z WITH CARON
992 [0xBF] = 0x017C, // LATIN SMALL LETTER Z WITH DOT ABOVE
993 [0xC0] = 0x0154, // LATIN CAPITAL LETTER R WITH ACUTE
994 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
995 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
996 [0xC3] = 0x0102, // LATIN CAPITAL LETTER A WITH BREVE
997 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
998 [0xC5] = 0x0139, // LATIN CAPITAL LETTER L WITH ACUTE
999 [0xC6] = 0x0106, // LATIN CAPITAL LETTER C WITH ACUTE
1000 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
1001 [0xC8] = 0x010C, // LATIN CAPITAL LETTER C WITH CARON
1002 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
1003 [0xCA] = 0x0118, // LATIN CAPITAL LETTER E WITH OGONEK
1004 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
1005 [0xCC] = 0x011A, // LATIN CAPITAL LETTER E WITH CARON
1006 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
1007 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
1008 [0xCF] = 0x010E, // LATIN CAPITAL LETTER D WITH CARON
1009 [0xD0] = 0x0110, // LATIN CAPITAL LETTER D WITH STROKE
1010 [0xD1] = 0x0143, // LATIN CAPITAL LETTER N WITH ACUTE
1011 [0xD2] = 0x0147, // LATIN CAPITAL LETTER N WITH CARON
1012 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
1013 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
1014 [0xD5] = 0x0150, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
1015 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
1016 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
1017 [0xD8] = 0x0158, // LATIN CAPITAL LETTER R WITH CARON
1018 [0xD9] = 0x016E, // LATIN CAPITAL LETTER U WITH RING ABOVE
1019 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
1020 [0xDB] = 0x0170, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
1021 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
1022 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
1023 [0xDE] = 0x0162, // LATIN CAPITAL LETTER T WITH CEDILLA
1024 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S
1025 [0xE0] = 0x0155, // LATIN SMALL LETTER R WITH ACUTE
1026 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
1027 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
1028 [0xE3] = 0x0103, // LATIN SMALL LETTER A WITH BREVE
1029 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
1030 [0xE5] = 0x013A, // LATIN SMALL LETTER L WITH ACUTE
1031 [0xE6] = 0x0107, // LATIN SMALL LETTER C WITH ACUTE
1032 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
1033 [0xE8] = 0x010D, // LATIN SMALL LETTER C WITH CARON
1034 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
1035 [0xEA] = 0x0119, // LATIN SMALL LETTER E WITH OGONEK
1036 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
1037 [0xEC] = 0x011B, // LATIN SMALL LETTER E WITH CARON
1038 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
1039 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
1040 [0xEF] = 0x010F, // LATIN SMALL LETTER D WITH CARON
1041 [0xF0] = 0x0111, // LATIN SMALL LETTER D WITH STROKE
1042 [0xF1] = 0x0144, // LATIN SMALL LETTER N WITH ACUTE
1043 [0xF2] = 0x0148, // LATIN SMALL LETTER N WITH CARON
1044 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
1045 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
1046 [0xF5] = 0x0151, // LATIN SMALL LETTER O WITH DOUBLE ACUTE
1047 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
1048 [0xF7] = 0x00F7, // DIVISION SIGN
1049 [0xF8] = 0x0159, // LATIN SMALL LETTER R WITH CARON
1050 [0xF9] = 0x016F, // LATIN SMALL LETTER U WITH RING ABOVE
1051 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
1052 [0xFB] = 0x0171, // LATIN SMALL LETTER U WITH DOUBLE ACUTE
1053 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
1054 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
1055 [0xFE] = 0x0163, // LATIN SMALL LETTER T WITH CEDILLA
1056 [0xFF] = 0x02D9, // DOT ABOVE
1059 static const wchar_t ISO_8859_15_UNICODE_TABLE[] = {
1060 [0x00] = 0x0000, // NULL
1061 [0x01] = 0x0001, // START OF HEADING
1062 [0x02] = 0x0002, // START OF TEXT
1063 [0x03] = 0x0003, // END OF TEXT
1064 [0x04] = 0x0004, // END OF TRANSMISSION
1065 [0x05] = 0x0005, // ENQUIRY
1066 [0x06] = 0x0006, // ACKNOWLEDGE
1067 [0x07] = 0x0007, // BELL
1068 [0x08] = 0x0008, // BACKSPACE
1069 [0x09] = 0x0009, // HORIZONTAL TABULATION
1070 [0x0A] = 0x000A, // LINE FEED
1071 [0x0B] = 0x000B, // VERTICAL TABULATION
1072 [0x0C] = 0x000C, // FORM FEED
1073 [0x0D] = 0x000D, // CARRIAGE RETURN
1074 [0x0E] = 0x000E, // SHIFT OUT
1075 [0x0F] = 0x000F, // SHIFT IN
1076 [0x10] = 0x0010, // DATA LINK ESCAPE
1077 [0x11] = 0x0011, // DEVICE CONTROL ONE
1078 [0x12] = 0x0012, // DEVICE CONTROL TWO
1079 [0x13] = 0x0013, // DEVICE CONTROL THREE
1080 [0x14] = 0x0014, // DEVICE CONTROL FOUR
1081 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
1082 [0x16] = 0x0016, // SYNCHRONOUS IDLE
1083 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
1084 [0x18] = 0x0018, // CANCEL
1085 [0x19] = 0x0019, // END OF MEDIUM
1086 [0x1A] = 0x001A, // SUBSTITUTE
1087 [0x1B] = 0x001B, // ESCAPE
1088 [0x1C] = 0x001C, // FILE SEPARATOR
1089 [0x1D] = 0x001D, // GROUP SEPARATOR
1090 [0x1E] = 0x001E, // RECORD SEPARATOR
1091 [0x1F] = 0x001F, // UNIT SEPARATOR
1092 [0x20] = 0x0020, // SPACE
1093 [0x21] = 0x0021, // EXCLAMATION MARK
1094 [0x22] = 0x0022, // QUOTATION MARK
1095 [0x23] = 0x0023, // NUMBER SIGN
1096 [0x24] = 0x0024, // DOLLAR SIGN
1097 [0x25] = 0x0025, // PERCENT SIGN
1098 [0x26] = 0x0026, // AMPERSAND
1099 [0x27] = 0x0027, // APOSTROPHE
1100 [0x28] = 0x0028, // LEFT PARENTHESIS
1101 [0x29] = 0x0029, // RIGHT PARENTHESIS
1102 [0x2A] = 0x002A, // ASTERISK
1103 [0x2B] = 0x002B, // PLUS SIGN
1104 [0x2C] = 0x002C, // COMMA
1105 [0x2D] = 0x002D, // HYPHEN-MINUS
1106 [0x2E] = 0x002E, // FULL STOP
1107 [0x2F] = 0x002F, // SOLIDUS
1108 [0x30] = 0x0030, // DIGIT ZERO
1109 [0x31] = 0x0031, // DIGIT ONE
1110 [0x32] = 0x0032, // DIGIT TWO
1111 [0x33] = 0x0033, // DIGIT THREE
1112 [0x34] = 0x0034, // DIGIT FOUR
1113 [0x35] = 0x0035, // DIGIT FIVE
1114 [0x36] = 0x0036, // DIGIT SIX
1115 [0x37] = 0x0037, // DIGIT SEVEN
1116 [0x38] = 0x0038, // DIGIT EIGHT
1117 [0x39] = 0x0039, // DIGIT NINE
1118 [0x3A] = 0x003A, // COLON
1119 [0x3B] = 0x003B, // SEMICOLON
1120 [0x3C] = 0x003C, // LESS-THAN SIGN
1121 [0x3D] = 0x003D, // EQUALS SIGN
1122 [0x3E] = 0x003E, // GREATER-THAN SIGN
1123 [0x3F] = 0x003F, // QUESTION MARK
1124 [0x40] = 0x0040, // COMMERCIAL AT
1125 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
1126 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
1127 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
1128 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
1129 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
1130 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
1131 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
1132 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
1133 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
1134 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
1135 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
1136 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
1137 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
1138 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
1139 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
1140 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
1141 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
1142 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
1143 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
1144 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
1145 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
1146 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
1147 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
1148 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
1149 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
1150 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
1151 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
1152 [0x5C] = 0x005C, // REVERSE SOLIDUS
1153 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
1154 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
1155 [0x5F] = 0x005F, // LOW LINE
1156 [0x60] = 0x0060, // GRAVE ACCENT
1157 [0x61] = 0x0061, // LATIN SMALL LETTER A
1158 [0x62] = 0x0062, // LATIN SMALL LETTER B
1159 [0x63] = 0x0063, // LATIN SMALL LETTER C
1160 [0x64] = 0x0064, // LATIN SMALL LETTER D
1161 [0x65] = 0x0065, // LATIN SMALL LETTER E
1162 [0x66] = 0x0066, // LATIN SMALL LETTER F
1163 [0x67] = 0x0067, // LATIN SMALL LETTER G
1164 [0x68] = 0x0068, // LATIN SMALL LETTER H
1165 [0x69] = 0x0069, // LATIN SMALL LETTER I
1166 [0x6A] = 0x006A, // LATIN SMALL LETTER J
1167 [0x6B] = 0x006B, // LATIN SMALL LETTER K
1168 [0x6C] = 0x006C, // LATIN SMALL LETTER L
1169 [0x6D] = 0x006D, // LATIN SMALL LETTER M
1170 [0x6E] = 0x006E, // LATIN SMALL LETTER N
1171 [0x6F] = 0x006F, // LATIN SMALL LETTER O
1172 [0x70] = 0x0070, // LATIN SMALL LETTER P
1173 [0x71] = 0x0071, // LATIN SMALL LETTER Q
1174 [0x72] = 0x0072, // LATIN SMALL LETTER R
1175 [0x73] = 0x0073, // LATIN SMALL LETTER S
1176 [0x74] = 0x0074, // LATIN SMALL LETTER T
1177 [0x75] = 0x0075, // LATIN SMALL LETTER U
1178 [0x76] = 0x0076, // LATIN SMALL LETTER V
1179 [0x77] = 0x0077, // LATIN SMALL LETTER W
1180 [0x78] = 0x0078, // LATIN SMALL LETTER X
1181 [0x79] = 0x0079, // LATIN SMALL LETTER Y
1182 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
1183 [0x7B] = 0x007B, // LEFT CURLY BRACKET
1184 [0x7C] = 0x007C, // VERTICAL LINE
1185 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
1186 [0x7E] = 0x007E, // TILDE
1187 [0x7F] = 0x007F, // DELETE
1188 [0x80] = 0x0080, // <control>
1189 [0x81] = 0x0081, // <control>
1190 [0x82] = 0x0082, // <control>
1191 [0x83] = 0x0083, // <control>
1192 [0x84] = 0x0084, // <control>
1193 [0x85] = 0x0085, // <control>
1194 [0x86] = 0x0086, // <control>
1195 [0x87] = 0x0087, // <control>
1196 [0x88] = 0x0088, // <control>
1197 [0x89] = 0x0089, // <control>
1198 [0x8A] = 0x008A, // <control>
1199 [0x8B] = 0x008B, // <control>
1200 [0x8C] = 0x008C, // <control>
1201 [0x8D] = 0x008D, // <control>
1202 [0x8E] = 0x008E, // <control>
1203 [0x8F] = 0x008F, // <control>
1204 [0x90] = 0x0090, // <control>
1205 [0x91] = 0x0091, // <control>
1206 [0x92] = 0x0092, // <control>
1207 [0x93] = 0x0093, // <control>
1208 [0x94] = 0x0094, // <control>
1209 [0x95] = 0x0095, // <control>
1210 [0x96] = 0x0096, // <control>
1211 [0x97] = 0x0097, // <control>
1212 [0x98] = 0x0098, // <control>
1213 [0x99] = 0x0099, // <control>
1214 [0x9A] = 0x009A, // <control>
1215 [0x9B] = 0x009B, // <control>
1216 [0x9C] = 0x009C, // <control>
1217 [0x9D] = 0x009D, // <control>
1218 [0x9E] = 0x009E, // <control>
1219 [0x9F] = 0x009F, // <control>
1220 [0xA0] = 0x00A0, // NO-BREAK SPACE
1221 [0xA1] = 0x00A1, // INVERTED EXCLAMATION MARK
1222 [0xA2] = 0x00A2, // CENT SIGN
1223 [0xA3] = 0x00A3, // POUND SIGN
1224 [0xA4] = 0x20AC, // EURO SIGN
1225 [0xA5] = 0x00A5, // YEN SIGN
1226 [0xA6] = 0x0160, // LATIN CAPITAL LETTER S WITH CARON
1227 [0xA7] = 0x00A7, // SECTION SIGN
1228 [0xA8] = 0x0161, // LATIN SMALL LETTER S WITH CARON
1229 [0xA9] = 0x00A9, // COPYRIGHT SIGN
1230 [0xAA] = 0x00AA, // FEMININE ORDINAL INDICATOR
1231 [0xAB] = 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
1232 [0xAC] = 0x00AC, // NOT SIGN
1233 [0xAD] = 0x00AD, // SOFT HYPHEN
1234 [0xAE] = 0x00AE, // REGISTERED SIGN
1235 [0xAF] = 0x00AF, // MACRON
1236 [0xB0] = 0x00B0, // DEGREE SIGN
1237 [0xB1] = 0x00B1, // PLUS-MINUS SIGN
1238 [0xB2] = 0x00B2, // SUPERSCRIPT TWO
1239 [0xB3] = 0x00B3, // SUPERSCRIPT THREE
1240 [0xB4] = 0x017D, // LATIN CAPITAL LETTER Z WITH CARON
1241 [0xB5] = 0x00B5, // MICRO SIGN
1242 [0xB6] = 0x00B6, // PILCROW SIGN
1243 [0xB7] = 0x00B7, // MIDDLE DOT
1244 [0xB8] = 0x017E, // LATIN SMALL LETTER Z WITH CARON
1245 [0xB9] = 0x00B9, // SUPERSCRIPT ONE
1246 [0xBA] = 0x00BA, // MASCULINE ORDINAL INDICATOR
1247 [0xBB] = 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
1248 [0xBC] = 0x0152, // LATIN CAPITAL LIGATURE OE
1249 [0xBD] = 0x0153, // LATIN SMALL LIGATURE OE
1250 [0xBE] = 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS
1251 [0xBF] = 0x00BF, // INVERTED QUESTION MARK
1252 [0xC0] = 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
1253 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
1254 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
1255 [0xC3] = 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
1256 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
1257 [0xC5] = 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
1258 [0xC6] = 0x00C6, // LATIN CAPITAL LETTER AE
1259 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
1260 [0xC8] = 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
1261 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
1262 [0xCA] = 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
1263 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
1264 [0xCC] = 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
1265 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
1266 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
1267 [0xCF] = 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
1268 [0xD0] = 0x00D0, // LATIN CAPITAL LETTER ETH
1269 [0xD1] = 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
1270 [0xD2] = 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
1271 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
1272 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
1273 [0xD5] = 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
1274 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
1275 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
1276 [0xD8] = 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
1277 [0xD9] = 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
1278 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
1279 [0xDB] = 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
1280 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
1281 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
1282 [0xDE] = 0x00DE, // LATIN CAPITAL LETTER THORN
1283 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S
1284 [0xE0] = 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
1285 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
1286 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
1287 [0xE3] = 0x00E3, // LATIN SMALL LETTER A WITH TILDE
1288 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
1289 [0xE5] = 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
1290 [0xE6] = 0x00E6, // LATIN SMALL LETTER AE
1291 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
1292 [0xE8] = 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
1293 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
1294 [0xEA] = 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
1295 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
1296 [0xEC] = 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
1297 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
1298 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
1299 [0xEF] = 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
1300 [0xF0] = 0x00F0, // LATIN SMALL LETTER ETH
1301 [0xF1] = 0x00F1, // LATIN SMALL LETTER N WITH TILDE
1302 [0xF2] = 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
1303 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
1304 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
1305 [0xF5] = 0x00F5, // LATIN SMALL LETTER O WITH TILDE
1306 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
1307 [0xF7] = 0x00F7, // DIVISION SIGN
1308 [0xF8] = 0x00F8, // LATIN SMALL LETTER O WITH STROKE
1309 [0xF9] = 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
1310 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
1311 [0xFB] = 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
1312 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
1313 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
1314 [0xFE] = 0x00FE, // LATIN SMALL LETTER THORN
1315 [0xFF] = 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
1318 static struct map *charset_map safe;
1319 DEF_LOOKUP_CMD(charset_handle, charset_map);
1321 DEF_CMD(charset_char)
1324 const wchar_t *tbl = ci->home->data;
1326 ret = home_call(ci->home->parent, "doc:byte", ci->focus,
1327 ci->num, ci->mark, NULL,
1328 ci->num2, ci->mark2);
1330 if (!ci->mark2 && ret != CHAR_RET(WEOF) && ret >0)
1331 ret = CHAR_RET(tbl[ret & 0xff]);
1337 struct command *cb safe;
1338 struct pane *p safe;
1340 const wchar_t *tbl safe;
1343 DEF_CB(charset_content_cb)
1345 struct charsetcb *c = container_of(ci->comm, struct charsetcb, c);
1349 if (!ci->str || ci->num2 <= 0 || c->noalloc)
1350 return comm_call(c->cb, ci->key, c->p,
1351 c->tbl[ci->num & 0xff], ci->mark, ci->str,
1352 ci->num2, NULL, NULL,
1354 /* Buffer for utf8 content could be as much as 4 times ->str,
1355 * but that is unlikely. Allocate room for double, up to 1M.
1357 bsize = ci->num2 * 2;
1358 if (bsize > 1024*1024)
1361 buf_resize(&b, bsize);
1362 for (i = 0; i < ci->num2 && b.len < 1024*1024-2; i++) {
1363 unsigned char cc = ci->str[i];
1364 buf_append(&b, c->tbl[cc]);
1366 rv = comm_call(c->cb, ci->key, c->p,
1367 c->tbl[ci->num & 0xff], ci->mark,
1368 buf_final(&b), b.len,
1369 NULL, NULL, ci->x, 0);
1371 /* None of the extra was consumed. Assume that will continue */
1376 if (rv >= b.len + 1) {
1377 /* All of the extra (that we decoded) was consumed */
1381 /* Only some was consumed. We needed to map back to number of bytes. */
1383 for (i = 0; i < ci->num2 && b.len < (rv-1); i++)
1384 buf_append(&b, c->tbl[(unsigned char)ci->str[i]]);
1389 DEF_CMD(charset_content)
1392 const wchar_t *tbl = ci->home->data;
1394 if (!ci->comm2 || !ci->mark)
1397 c.c = charset_content_cb;
1402 return home_call_comm(ci->home->parent, ci->key, ci->home,
1403 &c.c, 0, ci->mark, NULL, 0, ci->mark2);
1406 static int charset_to_utf8(const struct cmd_info *ci safe, const wchar_t tbl[])
1412 if (!s || !ci->comm2)
1416 buf_append(&b, tbl[*s & 0xff]);
1419 comm_call(ci->comm2, "cb", ci->focus, 0, NULL, buf_final(&b));
1424 DEF_CMD(win1251_to_utf8)
1426 return charset_to_utf8(ci, WIN1251_UNICODE_TABLE);
1429 DEF_CMD(win1252_to_utf8)
1431 return charset_to_utf8(ci, WIN1252_UNICODE_TABLE);
1434 DEF_CMD(iso8859_1_to_utf8)
1436 return charset_to_utf8(ci, ISO_8859_1_UNICODE_TABLE);
1439 DEF_CMD(iso8859_2_to_utf8)
1441 return charset_to_utf8(ci, ISO_8859_2_UNICODE_TABLE);
1444 DEF_CMD(iso8859_15_to_utf8)
1446 return charset_to_utf8(ci, ISO_8859_15_UNICODE_TABLE);
1449 DEF_CMD(win1251_attach)
1453 p = pane_register(ci->focus, 0, &charset_handle.c,
1454 WIN1251_UNICODE_TABLE);
1458 return comm_call(ci->comm2, "cb", p);
1461 DEF_CMD(win1252_attach)
1465 p = pane_register(ci->focus, 0, &charset_handle.c,
1466 WIN1252_UNICODE_TABLE);
1470 return comm_call(ci->comm2, "cb", p);
1473 DEF_CMD(iso8859_1_attach)
1477 p = pane_register(ci->focus, 0, &charset_handle.c,
1478 ISO_8859_1_UNICODE_TABLE);
1482 return comm_call(ci->comm2, "cb", p);
1485 DEF_CMD(iso8859_2_attach)
1489 p = pane_register(ci->focus, 0, &charset_handle.c,
1490 ISO_8859_2_UNICODE_TABLE);
1494 return comm_call(ci->comm2, "cb", p);
1497 DEF_CMD(iso8859_15_attach)
1501 p = pane_register(ci->focus, 0, &charset_handle.c,
1502 ISO_8859_15_UNICODE_TABLE);
1506 return comm_call(ci->comm2, "cb", p);
1509 void edlib_init(struct pane *ed safe)
1511 charset_map = key_alloc();
1513 key_add(charset_map, "doc:char", &charset_char);
1514 key_add(charset_map, "doc:content", &charset_content);
1515 /* No doc:content-bytes - that wouldn't make sense */
1517 /* Use 1251 for any unknown 'windows' charset */
1518 call_comm("global-set-command-prefix", ed, &win1251_attach,
1519 0, NULL, "attach-charset-windows-");
1520 call_comm("global-set-command-prefix", ed, &win1251_to_utf8,
1521 0, NULL, "charset-to-utf8-windows-");
1523 call_comm("global-set-command", ed, &win1252_attach, 0, NULL,
1524 "attach-charset-windows-1252");
1525 call_comm("global-set-command", ed, &win1252_to_utf8, 0, NULL,
1526 "charset-to-utf8-windows-1252");
1528 /* Use iso-8859-15 for any unknown iso-8859, and for ascii */
1529 call_comm("global-set-command-prefix", ed, &iso8859_15_attach,
1530 0, NULL, "attach-charset-iso-8859-");
1531 call_comm("global-set-command-prefix", ed, &iso8859_15_to_utf8,
1532 0, NULL, "charset-to-utf8-iso-8859-");
1534 call_comm("global-set-command", ed, &iso8859_15_attach, 0, NULL,
1535 "attach-charset-us-ascii");
1536 call_comm("global-set-command", ed, &iso8859_15_to_utf8, 0, NULL,
1537 "charset-to-utf8-us-ascii");
1539 call_comm("global-set-command", ed, &iso8859_1_attach, 0, NULL,
1540 "attach-charset-iso-8859-1");
1541 call_comm("global-set-command", ed, &iso8859_1_to_utf8, 0, NULL,
1542 "charset-to-utf8-iso-8859-1");
1544 call_comm("global-set-command", ed, &iso8859_2_attach, 0, NULL,
1545 "attach-charset-iso-8859-2");
1546 call_comm("global-set-command", ed, &iso8859_2_to_utf8, 0, NULL,
1547 "charset-to-utf8-iso-8859-2");