2 * Copyright Neil Brown ©2017-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * Filter a view on a document to convert 8-bit chars in various
6 * charsets to the relevant unicode characters.
8 * Include tables transformed from
9 * https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1251.txt
10 * https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
11 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
12 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
13 * https://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT
21 static const wchar_t WIN1251_UNICODE_TABLE[] = {
22 [0x00] = 0x0000, // Null
23 [0x01] = 0x0001, // Start Of Heading
24 [0x02] = 0x0002, // Start Of Text
25 [0x03] = 0x0003, // End Of Text
26 [0x04] = 0x0004, // End Of Transmission
27 [0x05] = 0x0005, // Enquiry
28 [0x06] = 0x0006, // Acknowledge
29 [0x07] = 0x0007, // Bell
30 [0x08] = 0x0008, // Backspace
31 [0x09] = 0x0009, // Horizontal Tabulation
32 [0x0a] = 0x000a, // Line Feed
33 [0x0b] = 0x000b, // Vertical Tabulation
34 [0x0c] = 0x000c, // Form Feed
35 [0x0d] = 0x000d, // Carriage Return
36 [0x0e] = 0x000e, // Shift Out
37 [0x0f] = 0x000f, // Shift In
38 [0x10] = 0x0010, // Data Link Escape
39 [0x11] = 0x0011, // Device Control One
40 [0x12] = 0x0012, // Device Control Two
41 [0x13] = 0x0013, // Device Control Three
42 [0x14] = 0x0014, // Device Control Four
43 [0x15] = 0x0015, // Negative Acknowledge
44 [0x16] = 0x0016, // Synchronous Idle
45 [0x17] = 0x0017, // End Of Transmission Block
46 [0x18] = 0x0018, // Cancel
47 [0x19] = 0x0019, // End Of Medium
48 [0x1a] = 0x001a, // Substitute
49 [0x1b] = 0x001b, // Escape
50 [0x1c] = 0x001c, // File Separator
51 [0x1d] = 0x001d, // Group Separator
52 [0x1e] = 0x001e, // Record Separator
53 [0x1f] = 0x001f, // Unit Separator
54 [0x20] = 0x0020, // Space
55 [0x21] = 0x0021, // Exclamation Mark
56 [0x22] = 0x0022, // Quotation Mark
57 [0x23] = 0x0023, // Number Sign
58 [0x24] = 0x0024, // Dollar Sign
59 [0x25] = 0x0025, // Percent Sign
60 [0x26] = 0x0026, // Ampersand
61 [0x27] = 0x0027, // Apostrophe
62 [0x28] = 0x0028, // Left Parenthesis
63 [0x29] = 0x0029, // Right Parenthesis
64 [0x2a] = 0x002a, // Asterisk
65 [0x2b] = 0x002b, // Plus Sign
66 [0x2c] = 0x002c, // Comma
67 [0x2d] = 0x002d, // Hyphen-Minus
68 [0x2e] = 0x002e, // Full Stop
69 [0x2f] = 0x002f, // Solidus
70 [0x30] = 0x0030, // Digit Zero
71 [0x31] = 0x0031, // Digit One
72 [0x32] = 0x0032, // Digit Two
73 [0x33] = 0x0033, // Digit Three
74 [0x34] = 0x0034, // Digit Four
75 [0x35] = 0x0035, // Digit Five
76 [0x36] = 0x0036, // Digit Six
77 [0x37] = 0x0037, // Digit Seven
78 [0x38] = 0x0038, // Digit Eight
79 [0x39] = 0x0039, // Digit Nine
80 [0x3a] = 0x003a, // Colon
81 [0x3b] = 0x003b, // Semicolon
82 [0x3c] = 0x003c, // Less-Than Sign
83 [0x3d] = 0x003d, // Equals Sign
84 [0x3e] = 0x003e, // Greater-Than Sign
85 [0x3f] = 0x003f, // Question Mark
86 [0x40] = 0x0040, // Commercial At
87 [0x41] = 0x0041, // Latin Capital Letter A
88 [0x42] = 0x0042, // Latin Capital Letter B
89 [0x43] = 0x0043, // Latin Capital Letter C
90 [0x44] = 0x0044, // Latin Capital Letter D
91 [0x45] = 0x0045, // Latin Capital Letter E
92 [0x46] = 0x0046, // Latin Capital Letter F
93 [0x47] = 0x0047, // Latin Capital Letter G
94 [0x48] = 0x0048, // Latin Capital Letter H
95 [0x49] = 0x0049, // Latin Capital Letter I
96 [0x4a] = 0x004a, // Latin Capital Letter J
97 [0x4b] = 0x004b, // Latin Capital Letter K
98 [0x4c] = 0x004c, // Latin Capital Letter L
99 [0x4d] = 0x004d, // Latin Capital Letter M
100 [0x4e] = 0x004e, // Latin Capital Letter N
101 [0x4f] = 0x004f, // Latin Capital Letter O
102 [0x50] = 0x0050, // Latin Capital Letter P
103 [0x51] = 0x0051, // Latin Capital Letter Q
104 [0x52] = 0x0052, // Latin Capital Letter R
105 [0x53] = 0x0053, // Latin Capital Letter S
106 [0x54] = 0x0054, // Latin Capital Letter T
107 [0x55] = 0x0055, // Latin Capital Letter U
108 [0x56] = 0x0056, // Latin Capital Letter V
109 [0x57] = 0x0057, // Latin Capital Letter W
110 [0x58] = 0x0058, // Latin Capital Letter X
111 [0x59] = 0x0059, // Latin Capital Letter Y
112 [0x5a] = 0x005a, // Latin Capital Letter Z
113 [0x5b] = 0x005b, // Left Square Bracket
114 [0x5c] = 0x005c, // Reverse Solidus
115 [0x5d] = 0x005d, // Right Square Bracket
116 [0x5e] = 0x005e, // Circumflex Accent
117 [0x5f] = 0x005f, // Low Line
118 [0x60] = 0x0060, // Grave Accent
119 [0x61] = 0x0061, // Latin Small Letter A
120 [0x62] = 0x0062, // Latin Small Letter B
121 [0x63] = 0x0063, // Latin Small Letter C
122 [0x64] = 0x0064, // Latin Small Letter D
123 [0x65] = 0x0065, // Latin Small Letter E
124 [0x66] = 0x0066, // Latin Small Letter F
125 [0x67] = 0x0067, // Latin Small Letter G
126 [0x68] = 0x0068, // Latin Small Letter H
127 [0x69] = 0x0069, // Latin Small Letter I
128 [0x6a] = 0x006a, // Latin Small Letter J
129 [0x6b] = 0x006b, // Latin Small Letter K
130 [0x6c] = 0x006c, // Latin Small Letter L
131 [0x6d] = 0x006d, // Latin Small Letter M
132 [0x6e] = 0x006e, // Latin Small Letter N
133 [0x6f] = 0x006f, // Latin Small Letter O
134 [0x70] = 0x0070, // Latin Small Letter P
135 [0x71] = 0x0071, // Latin Small Letter Q
136 [0x72] = 0x0072, // Latin Small Letter R
137 [0x73] = 0x0073, // Latin Small Letter S
138 [0x74] = 0x0074, // Latin Small Letter T
139 [0x75] = 0x0075, // Latin Small Letter U
140 [0x76] = 0x0076, // Latin Small Letter V
141 [0x77] = 0x0077, // Latin Small Letter W
142 [0x78] = 0x0078, // Latin Small Letter X
143 [0x79] = 0x0079, // Latin Small Letter Y
144 [0x7a] = 0x007a, // Latin Small Letter Z
145 [0x7b] = 0x007b, // Left Curly Bracket
146 [0x7c] = 0x007c, // Vertical Line
147 [0x7d] = 0x007d, // Right Curly Bracket
148 [0x7e] = 0x007e, // Tilde
149 [0x7f] = 0x007f, // Delete
150 [0x80] = 0x0402, // Cyrillic Capital Letter Dje
151 [0x81] = 0x0403, // Cyrillic Capital Letter Gje
152 [0x82] = 0x201a, // Single Low-9 Quotation Mark
153 [0x83] = 0x0453, // Cyrillic Small Letter Gje
154 [0x84] = 0x201e, // Double Low-9 Quotation Mark
155 [0x85] = 0x2026, // Horizontal Ellipsis
156 [0x86] = 0x2020, // Dagger
157 [0x87] = 0x2021, // Double Dagger
158 [0x88] = 0x20ac, // Euro Sign
159 [0x89] = 0x2030, // Per Mille Sign
160 [0x8a] = 0x0409, // Cyrillic Capital Letter Lje
161 [0x8b] = 0x2039, // Single Left-Pointing Angle Quotation Mark
162 [0x8c] = 0x040a, // Cyrillic Capital Letter Nje
163 [0x8d] = 0x040c, // Cyrillic Capital Letter Kje
164 [0x8e] = 0x040b, // Cyrillic Capital Letter Tshe
165 [0x8f] = 0x040f, // Cyrillic Capital Letter Dzhe
166 [0x90] = 0x0452, // Cyrillic Small Letter Dje
167 [0x91] = 0x2018, // Left Single Quotation Mark
168 [0x92] = 0x2019, // Right Single Quotation Mark
169 [0x93] = 0x201c, // Left Double Quotation Mark
170 [0x94] = 0x201d, // Right Double Quotation Mark
171 [0x95] = 0x2022, // Bullet
172 [0x96] = 0x2013, // En Dash
173 [0x97] = 0x2014, // Em Dash
174 [0x98] = 0x0098, // ??
175 [0x99] = 0x2122, // Trade Mark Sign
176 [0x9a] = 0x0459, // Cyrillic Small Letter Lje
177 [0x9b] = 0x203a, // Single Right-Pointing Angle Quotation Mark
178 [0x9c] = 0x045a, // Cyrillic Small Letter Nje
179 [0x9d] = 0x045c, // Cyrillic Small Letter Kje
180 [0x9e] = 0x045b, // Cyrillic Small Letter Tshe
181 [0x9f] = 0x045f, // Cyrillic Small Letter Dzhe
182 [0xa0] = 0x00a0, // No-Break Space
183 [0xa1] = 0x040e, // Cyrillic Capital Letter Short U
184 [0xa2] = 0x045e, // Cyrillic Small Letter Short U
185 [0xa3] = 0x0408, // Cyrillic Capital Letter Je
186 [0xa4] = 0x00a4, // Currency Sign
187 [0xa5] = 0x0490, // Cyrillic Capital Letter Ghe With Upturn
188 [0xa6] = 0x00a6, // Broken Bar
189 [0xa7] = 0x00a7, // Section Sign
190 [0xa8] = 0x0401, // Cyrillic Capital Letter Io
191 [0xa9] = 0x00a9, // Copyright Sign
192 [0xaa] = 0x0404, // Cyrillic Capital Letter Ukrainian Ie
193 [0xab] = 0x00ab, // Left-Pointing Double Angle Quotation Mark
194 [0xac] = 0x00ac, // Not Sign
195 [0xad] = 0x00ad, // Soft Hyphen
196 [0xae] = 0x00ae, // Registered Sign
197 [0xaf] = 0x0407, // Cyrillic Capital Letter Yi
198 [0xb0] = 0x00b0, // Degree Sign
199 [0xb1] = 0x00b1, // Plus-Minus Sign
200 [0xb2] = 0x0406, // Cyrillic Capital Letter Byelorussian-Ukrainian I
201 [0xb3] = 0x0456, // Cyrillic Small Letter Byelorussian-Ukrainian I
202 [0xb4] = 0x0491, // Cyrillic Small Letter Ghe With Upturn
203 [0xb5] = 0x00b5, // Micro Sign
204 [0xb6] = 0x00b6, // Pilcrow Sign
205 [0xb7] = 0x00b7, // Middle Dot
206 [0xb8] = 0x0451, // Cyrillic Small Letter Io
207 [0xb9] = 0x2116, // Numero Sign
208 [0xba] = 0x0454, // Cyrillic Small Letter Ukrainian Ie
209 [0xbb] = 0x00bb, // Right-Pointing Double Angle Quotation Mark
210 [0xbc] = 0x0458, // Cyrillic Small Letter Je
211 [0xbd] = 0x0405, // Cyrillic Capital Letter Dze
212 [0xbe] = 0x0455, // Cyrillic Small Letter Dze
213 [0xbf] = 0x0457, // Cyrillic Small Letter Yi
214 [0xc0] = 0x0410, // Cyrillic Capital Letter A
215 [0xc1] = 0x0411, // Cyrillic Capital Letter Be
216 [0xc2] = 0x0412, // Cyrillic Capital Letter Ve
217 [0xc3] = 0x0413, // Cyrillic Capital Letter Ghe
218 [0xc4] = 0x0414, // Cyrillic Capital Letter De
219 [0xc5] = 0x0415, // Cyrillic Capital Letter Ie
220 [0xc6] = 0x0416, // Cyrillic Capital Letter Zhe
221 [0xc7] = 0x0417, // Cyrillic Capital Letter Ze
222 [0xc8] = 0x0418, // Cyrillic Capital Letter I
223 [0xc9] = 0x0419, // Cyrillic Capital Letter Short I
224 [0xca] = 0x041a, // Cyrillic Capital Letter Ka
225 [0xcb] = 0x041b, // Cyrillic Capital Letter El
226 [0xcc] = 0x041c, // Cyrillic Capital Letter Em
227 [0xcd] = 0x041d, // Cyrillic Capital Letter En
228 [0xce] = 0x041e, // Cyrillic Capital Letter O
229 [0xcf] = 0x041f, // Cyrillic Capital Letter Pe
230 [0xd0] = 0x0420, // Cyrillic Capital Letter Er
231 [0xd1] = 0x0421, // Cyrillic Capital Letter Es
232 [0xd2] = 0x0422, // Cyrillic Capital Letter Te
233 [0xd3] = 0x0423, // Cyrillic Capital Letter U
234 [0xd4] = 0x0424, // Cyrillic Capital Letter Ef
235 [0xd5] = 0x0425, // Cyrillic Capital Letter Ha
236 [0xd6] = 0x0426, // Cyrillic Capital Letter Tse
237 [0xd7] = 0x0427, // Cyrillic Capital Letter Che
238 [0xd8] = 0x0428, // Cyrillic Capital Letter Sha
239 [0xd9] = 0x0429, // Cyrillic Capital Letter Shcha
240 [0xda] = 0x042a, // Cyrillic Capital Letter Hard Sign
241 [0xdb] = 0x042b, // Cyrillic Capital Letter Yeru
242 [0xdc] = 0x042c, // Cyrillic Capital Letter Soft Sign
243 [0xdd] = 0x042d, // Cyrillic Capital Letter E
244 [0xde] = 0x042e, // Cyrillic Capital Letter Yu
245 [0xdf] = 0x042f, // Cyrillic Capital Letter Ya
246 [0xe0] = 0x0430, // Cyrillic Small Letter A
247 [0xe1] = 0x0431, // Cyrillic Small Letter Be
248 [0xe2] = 0x0432, // Cyrillic Small Letter Ve
249 [0xe3] = 0x0433, // Cyrillic Small Letter Ghe
250 [0xe4] = 0x0434, // Cyrillic Small Letter De
251 [0xe5] = 0x0435, // Cyrillic Small Letter Ie
252 [0xe6] = 0x0436, // Cyrillic Small Letter Zhe
253 [0xe7] = 0x0437, // Cyrillic Small Letter Ze
254 [0xe8] = 0x0438, // Cyrillic Small Letter I
255 [0xe9] = 0x0439, // Cyrillic Small Letter Short I
256 [0xea] = 0x043a, // Cyrillic Small Letter Ka
257 [0xeb] = 0x043b, // Cyrillic Small Letter El
258 [0xec] = 0x043c, // Cyrillic Small Letter Em
259 [0xed] = 0x043d, // Cyrillic Small Letter En
260 [0xee] = 0x043e, // Cyrillic Small Letter O
261 [0xef] = 0x043f, // Cyrillic Small Letter Pe
262 [0xf0] = 0x0440, // Cyrillic Small Letter Er
263 [0xf1] = 0x0441, // Cyrillic Small Letter Es
264 [0xf2] = 0x0442, // Cyrillic Small Letter Te
265 [0xf3] = 0x0443, // Cyrillic Small Letter U
266 [0xf4] = 0x0444, // Cyrillic Small Letter Ef
267 [0xf5] = 0x0445, // Cyrillic Small Letter Ha
268 [0xf6] = 0x0446, // Cyrillic Small Letter Tse
269 [0xf7] = 0x0447, // Cyrillic Small Letter Che
270 [0xf8] = 0x0448, // Cyrillic Small Letter Sha
271 [0xf9] = 0x0449, // Cyrillic Small Letter Shcha
272 [0xfa] = 0x044a, // Cyrillic Small Letter Hard Sign
273 [0xfb] = 0x044b, // Cyrillic Small Letter Yeru
274 [0xfc] = 0x044c, // Cyrillic Small Letter Soft Sign
275 [0xfd] = 0x044d, // Cyrillic Small Letter E
276 [0xfe] = 0x044e, // Cyrillic Small Letter Yu
277 [0xff] = 0x044f, // Cyrillic Small Letter Ya
280 static const wchar_t WIN1252_UNICODE_TABLE[] = {
281 [0x00] = 0x0000, // Null
282 [0x01] = 0x0001, // Start Of Heading
283 [0x02] = 0x0002, // Start Of Text
284 [0x03] = 0x0003, // End Of Text
285 [0x04] = 0x0004, // End Of Transmission
286 [0x05] = 0x0005, // Enquiry
287 [0x06] = 0x0006, // Acknowledge
288 [0x07] = 0x0007, // Bell
289 [0x08] = 0x0008, // Backspace
290 [0x09] = 0x0009, // Horizontal Tabulation
291 [0x0a] = 0x000a, // Line Feed
292 [0x0b] = 0x000b, // Vertical Tabulation
293 [0x0c] = 0x000c, // Form Feed
294 [0x0d] = 0x000d, // Carriage Return
295 [0x0e] = 0x000e, // Shift Out
296 [0x0f] = 0x000f, // Shift In
297 [0x10] = 0x0010, // Data Link Escape
298 [0x11] = 0x0011, // Device Control One
299 [0x12] = 0x0012, // Device Control Two
300 [0x13] = 0x0013, // Device Control Three
301 [0x14] = 0x0014, // Device Control Four
302 [0x15] = 0x0015, // Negative Acknowledge
303 [0x16] = 0x0016, // Synchronous Idle
304 [0x17] = 0x0017, // End Of Transmission Block
305 [0x18] = 0x0018, // Cancel
306 [0x19] = 0x0019, // End Of Medium
307 [0x1a] = 0x001a, // Substitute
308 [0x1b] = 0x001b, // Escape
309 [0x1c] = 0x001c, // File Separator
310 [0x1d] = 0x001d, // Group Separator
311 [0x1e] = 0x001e, // Record Separator
312 [0x1f] = 0x001f, // Unit Separator
313 [0x20] = 0x0020, // Space
314 [0x21] = 0x0021, // Exclamation Mark
315 [0x22] = 0x0022, // Quotation Mark
316 [0x23] = 0x0023, // Number Sign
317 [0x24] = 0x0024, // Dollar Sign
318 [0x25] = 0x0025, // Percent Sign
319 [0x26] = 0x0026, // Ampersand
320 [0x27] = 0x0027, // Apostrophe
321 [0x28] = 0x0028, // Left Parenthesis
322 [0x29] = 0x0029, // Right Parenthesis
323 [0x2a] = 0x002a, // Asterisk
324 [0x2b] = 0x002b, // Plus Sign
325 [0x2c] = 0x002c, // Comma
326 [0x2d] = 0x002d, // Hyphen-Minus
327 [0x2e] = 0x002e, // Full Stop
328 [0x2f] = 0x002f, // Solidus
329 [0x30] = 0x0030, // Digit Zero
330 [0x31] = 0x0031, // Digit One
331 [0x32] = 0x0032, // Digit Two
332 [0x33] = 0x0033, // Digit Three
333 [0x34] = 0x0034, // Digit Four
334 [0x35] = 0x0035, // Digit Five
335 [0x36] = 0x0036, // Digit Six
336 [0x37] = 0x0037, // Digit Seven
337 [0x38] = 0x0038, // Digit Eight
338 [0x39] = 0x0039, // Digit Nine
339 [0x3a] = 0x003a, // Colon
340 [0x3b] = 0x003b, // Semicolon
341 [0x3c] = 0x003c, // Less-Than Sign
342 [0x3d] = 0x003d, // Equals Sign
343 [0x3e] = 0x003e, // Greater-Than Sign
344 [0x3f] = 0x003f, // Question Mark
345 [0x40] = 0x0040, // Commercial At
346 [0x41] = 0x0041, // Latin Capital Letter A
347 [0x42] = 0x0042, // Latin Capital Letter B
348 [0x43] = 0x0043, // Latin Capital Letter C
349 [0x44] = 0x0044, // Latin Capital Letter D
350 [0x45] = 0x0045, // Latin Capital Letter E
351 [0x46] = 0x0046, // Latin Capital Letter F
352 [0x47] = 0x0047, // Latin Capital Letter G
353 [0x48] = 0x0048, // Latin Capital Letter H
354 [0x49] = 0x0049, // Latin Capital Letter I
355 [0x4a] = 0x004a, // Latin Capital Letter J
356 [0x4b] = 0x004b, // Latin Capital Letter K
357 [0x4c] = 0x004c, // Latin Capital Letter L
358 [0x4d] = 0x004d, // Latin Capital Letter M
359 [0x4e] = 0x004e, // Latin Capital Letter N
360 [0x4f] = 0x004f, // Latin Capital Letter O
361 [0x50] = 0x0050, // Latin Capital Letter P
362 [0x51] = 0x0051, // Latin Capital Letter Q
363 [0x52] = 0x0052, // Latin Capital Letter R
364 [0x53] = 0x0053, // Latin Capital Letter S
365 [0x54] = 0x0054, // Latin Capital Letter T
366 [0x55] = 0x0055, // Latin Capital Letter U
367 [0x56] = 0x0056, // Latin Capital Letter V
368 [0x57] = 0x0057, // Latin Capital Letter W
369 [0x58] = 0x0058, // Latin Capital Letter X
370 [0x59] = 0x0059, // Latin Capital Letter Y
371 [0x5a] = 0x005a, // Latin Capital Letter Z
372 [0x5b] = 0x005b, // Left Square Bracket
373 [0x5c] = 0x005c, // Reverse Solidus
374 [0x5d] = 0x005d, // Right Square Bracket
375 [0x5e] = 0x005e, // Circumflex Accent
376 [0x5f] = 0x005f, // Low Line
377 [0x60] = 0x0060, // Grave Accent
378 [0x61] = 0x0061, // Latin Small Letter A
379 [0x62] = 0x0062, // Latin Small Letter B
380 [0x63] = 0x0063, // Latin Small Letter C
381 [0x64] = 0x0064, // Latin Small Letter D
382 [0x65] = 0x0065, // Latin Small Letter E
383 [0x66] = 0x0066, // Latin Small Letter F
384 [0x67] = 0x0067, // Latin Small Letter G
385 [0x68] = 0x0068, // Latin Small Letter H
386 [0x69] = 0x0069, // Latin Small Letter I
387 [0x6a] = 0x006a, // Latin Small Letter J
388 [0x6b] = 0x006b, // Latin Small Letter K
389 [0x6c] = 0x006c, // Latin Small Letter L
390 [0x6d] = 0x006d, // Latin Small Letter M
391 [0x6e] = 0x006e, // Latin Small Letter N
392 [0x6f] = 0x006f, // Latin Small Letter O
393 [0x70] = 0x0070, // Latin Small Letter P
394 [0x71] = 0x0071, // Latin Small Letter Q
395 [0x72] = 0x0072, // Latin Small Letter R
396 [0x73] = 0x0073, // Latin Small Letter S
397 [0x74] = 0x0074, // Latin Small Letter T
398 [0x75] = 0x0075, // Latin Small Letter U
399 [0x76] = 0x0076, // Latin Small Letter V
400 [0x77] = 0x0077, // Latin Small Letter W
401 [0x78] = 0x0078, // Latin Small Letter X
402 [0x79] = 0x0079, // Latin Small Letter Y
403 [0x7a] = 0x007a, // Latin Small Letter Z
404 [0x7b] = 0x007b, // Left Curly Bracket
405 [0x7c] = 0x007c, // Vertical Line
406 [0x7d] = 0x007d, // Right Curly Bracket
407 [0x7e] = 0x007e, // Tilde
408 [0x7f] = 0x007f, // Delete
409 [0x80] = 0x20ac, // Euro Sign
410 [0x81] = 0x0081, // ??
411 [0x82] = 0x201a, // Single Low-9 Quotation Mark
412 [0x83] = 0x0192, // Latin Small Letter F With Hook
413 [0x84] = 0x201e, // Double Low-9 Quotation Mark
414 [0x85] = 0x2026, // Horizontal Ellipsis
415 [0x86] = 0x2020, // Dagger
416 [0x87] = 0x2021, // Double Dagger
417 [0x88] = 0x02c6, // Modifier Letter Circumflex Accent
418 [0x89] = 0x2030, // Per Mille Sign
419 [0x8a] = 0x0160, // Latin Capital Letter S With Caron
420 [0x8b] = 0x2039, // Single Left-Pointing Angle Quotation Mark
421 [0x8c] = 0x0152, // Latin Capital Ligature Oe
422 [0x8d] = 0x008d, // ??
423 [0x8e] = 0x017d, // Latin Capital Letter Z With Caron
424 [0x8f] = 0x008f, // ??
425 [0x90] = 0x0090, // ??
426 [0x91] = 0x2018, // Left Single Quotation Mark
427 [0x92] = 0x2019, // Right Single Quotation Mark
428 [0x93] = 0x201c, // Left Double Quotation Mark
429 [0x94] = 0x201d, // Right Double Quotation Mark
430 [0x95] = 0x2022, // Bullet
431 [0x96] = 0x2013, // En Dash
432 [0x97] = 0x2014, // Em Dash
433 [0x98] = 0x02dc, // Small Tilde
434 [0x99] = 0x2122, // Trade Mark Sign
435 [0x9a] = 0x0161, // Latin Small Letter S With Caron
436 [0x9b] = 0x203a, // Single Right-Pointing Angle Quotation Mark
437 [0x9c] = 0x0153, // Latin Small Ligature Oe
438 [0x9d] = 0x009d, // ??
439 [0x9e] = 0x017e, // Latin Small Letter Z With Caron
440 [0x9f] = 0x0178, // Latin Capital Letter Y With Diaeresis
441 [0xa0] = 0x00a0, // No-Break Space
442 [0xa1] = 0x00a1, // Inverted Exclamation Mark
443 [0xa2] = 0x00a2, // Cent Sign
444 [0xa3] = 0x00a3, // Pound Sign
445 [0xa4] = 0x00a4, // Currency Sign
446 [0xa5] = 0x00a5, // Yen Sign
447 [0xa6] = 0x00a6, // Broken Bar
448 [0xa7] = 0x00a7, // Section Sign
449 [0xa8] = 0x00a8, // Diaeresis
450 [0xa9] = 0x00a9, // Copyright Sign
451 [0xaa] = 0x00aa, // Feminine Ordinal Indicator
452 [0xab] = 0x00ab, // Left-Pointing Double Angle Quotation Mark
453 [0xac] = 0x00ac, // Not Sign
454 [0xad] = 0x00ad, // Soft Hyphen
455 [0xae] = 0x00ae, // Registered Sign
456 [0xaf] = 0x00af, // Macron
457 [0xb0] = 0x00b0, // Degree Sign
458 [0xb1] = 0x00b1, // Plus-Minus Sign
459 [0xb2] = 0x00b2, // Superscript Two
460 [0xb3] = 0x00b3, // Superscript Three
461 [0xb4] = 0x00b4, // Acute Accent
462 [0xb5] = 0x00b5, // Micro Sign
463 [0xb6] = 0x00b6, // Pilcrow Sign
464 [0xb7] = 0x00b7, // Middle Dot
465 [0xb8] = 0x00b8, // Cedilla
466 [0xb9] = 0x00b9, // Superscript One
467 [0xba] = 0x00ba, // Masculine Ordinal Indicator
468 [0xbb] = 0x00bb, // Right-Pointing Double Angle Quotation Mark
469 [0xbc] = 0x00bc, // Vulgar Fraction One Quarter
470 [0xbd] = 0x00bd, // Vulgar Fraction One Half
471 [0xbe] = 0x00be, // Vulgar Fraction Three Quarters
472 [0xbf] = 0x00bf, // Inverted Question Mark
473 [0xc0] = 0x00c0, // Latin Capital Letter A With Grave
474 [0xc1] = 0x00c1, // Latin Capital Letter A With Acute
475 [0xc2] = 0x00c2, // Latin Capital Letter A With Circumflex
476 [0xc3] = 0x00c3, // Latin Capital Letter A With Tilde
477 [0xc4] = 0x00c4, // Latin Capital Letter A With Diaeresis
478 [0xc5] = 0x00c5, // Latin Capital Letter A With Ring Above
479 [0xc6] = 0x00c6, // Latin Capital Ligature Ae
480 [0xc7] = 0x00c7, // Latin Capital Letter C With Cedilla
481 [0xc8] = 0x00c8, // Latin Capital Letter E With Grave
482 [0xc9] = 0x00c9, // Latin Capital Letter E With Acute
483 [0xca] = 0x00ca, // Latin Capital Letter E With Circumflex
484 [0xcb] = 0x00cb, // Latin Capital Letter E With Diaeresis
485 [0xcc] = 0x00cc, // Latin Capital Letter I With Grave
486 [0xcd] = 0x00cd, // Latin Capital Letter I With Acute
487 [0xce] = 0x00ce, // Latin Capital Letter I With Circumflex
488 [0xcf] = 0x00cf, // Latin Capital Letter I With Diaeresis
489 [0xd0] = 0x00d0, // Latin Capital Letter Eth
490 [0xd1] = 0x00d1, // Latin Capital Letter N With Tilde
491 [0xd2] = 0x00d2, // Latin Capital Letter O With Grave
492 [0xd3] = 0x00d3, // Latin Capital Letter O With Acute
493 [0xd4] = 0x00d4, // Latin Capital Letter O With Circumflex
494 [0xd5] = 0x00d5, // Latin Capital Letter O With Tilde
495 [0xd6] = 0x00d6, // Latin Capital Letter O With Diaeresis
496 [0xd7] = 0x00d7, // Multiplication Sign
497 [0xd8] = 0x00d8, // Latin Capital Letter O With Stroke
498 [0xd9] = 0x00d9, // Latin Capital Letter U With Grave
499 [0xda] = 0x00da, // Latin Capital Letter U With Acute
500 [0xdb] = 0x00db, // Latin Capital Letter U With Circumflex
501 [0xdc] = 0x00dc, // Latin Capital Letter U With Diaeresis
502 [0xdd] = 0x00dd, // Latin Capital Letter Y With Acute
503 [0xde] = 0x00de, // Latin Capital Letter Thorn
504 [0xdf] = 0x00df, // Latin Small Letter Sharp S
505 [0xe0] = 0x00e0, // Latin Small Letter A With Grave
506 [0xe1] = 0x00e1, // Latin Small Letter A With Acute
507 [0xe2] = 0x00e2, // Latin Small Letter A With Circumflex
508 [0xe3] = 0x00e3, // Latin Small Letter A With Tilde
509 [0xe4] = 0x00e4, // Latin Small Letter A With Diaeresis
510 [0xe5] = 0x00e5, // Latin Small Letter A With Ring Above
511 [0xe6] = 0x00e6, // Latin Small Ligature Ae
512 [0xe7] = 0x00e7, // Latin Small Letter C With Cedilla
513 [0xe8] = 0x00e8, // Latin Small Letter E With Grave
514 [0xe9] = 0x00e9, // Latin Small Letter E With Acute
515 [0xea] = 0x00ea, // Latin Small Letter E With Circumflex
516 [0xeb] = 0x00eb, // Latin Small Letter E With Diaeresis
517 [0xec] = 0x00ec, // Latin Small Letter I With Grave
518 [0xed] = 0x00ed, // Latin Small Letter I With Acute
519 [0xee] = 0x00ee, // Latin Small Letter I With Circumflex
520 [0xef] = 0x00ef, // Latin Small Letter I With Diaeresis
521 [0xf0] = 0x00f0, // Latin Small Letter Eth
522 [0xf1] = 0x00f1, // Latin Small Letter N With Tilde
523 [0xf2] = 0x00f2, // Latin Small Letter O With Grave
524 [0xf3] = 0x00f3, // Latin Small Letter O With Acute
525 [0xf4] = 0x00f4, // Latin Small Letter O With Circumflex
526 [0xf5] = 0x00f5, // Latin Small Letter O With Tilde
527 [0xf6] = 0x00f6, // Latin Small Letter O With Diaeresis
528 [0xf7] = 0x00f7, // Division Sign
529 [0xf8] = 0x00f8, // Latin Small Letter O With Stroke
530 [0xf9] = 0x00f9, // Latin Small Letter U With Grave
531 [0xfa] = 0x00fa, // Latin Small Letter U With Acute
532 [0xfb] = 0x00fb, // Latin Small Letter U With Circumflex
533 [0xfc] = 0x00fc, // Latin Small Letter U With Diaeresis
534 [0xfd] = 0x00fd, // Latin Small Letter Y With Acute
535 [0xfe] = 0x00fe, // Latin Small Letter Thorn
536 [0xff] = 0x00ff, // Latin Small Letter Y With Diaeresis
539 static const wchar_t ISO_8859_1_UNICODE_TABLE[] = {
540 [0x00] = 0x0000, // NULL
541 [0x01] = 0x0001, // START OF HEADING
542 [0x02] = 0x0002, // START OF TEXT
543 [0x03] = 0x0003, // END OF TEXT
544 [0x04] = 0x0004, // END OF TRANSMISSION
545 [0x05] = 0x0005, // ENQUIRY
546 [0x06] = 0x0006, // ACKNOWLEDGE
547 [0x07] = 0x0007, // BELL
548 [0x08] = 0x0008, // BACKSPACE
549 [0x09] = 0x0009, // HORIZONTAL TABULATION
550 [0x0A] = 0x000A, // LINE FEED
551 [0x0B] = 0x000B, // VERTICAL TABULATION
552 [0x0C] = 0x000C, // FORM FEED
553 [0x0D] = 0x000D, // CARRIAGE RETURN
554 [0x0E] = 0x000E, // SHIFT OUT
555 [0x0F] = 0x000F, // SHIFT IN
556 [0x10] = 0x0010, // DATA LINK ESCAPE
557 [0x11] = 0x0011, // DEVICE CONTROL ONE
558 [0x12] = 0x0012, // DEVICE CONTROL TWO
559 [0x13] = 0x0013, // DEVICE CONTROL THREE
560 [0x14] = 0x0014, // DEVICE CONTROL FOUR
561 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
562 [0x16] = 0x0016, // SYNCHRONOUS IDLE
563 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
564 [0x18] = 0x0018, // CANCEL
565 [0x19] = 0x0019, // END OF MEDIUM
566 [0x1A] = 0x001A, // SUBSTITUTE
567 [0x1B] = 0x001B, // ESCAPE
568 [0x1C] = 0x001C, // FILE SEPARATOR
569 [0x1D] = 0x001D, // GROUP SEPARATOR
570 [0x1E] = 0x001E, // RECORD SEPARATOR
571 [0x1F] = 0x001F, // UNIT SEPARATOR
572 [0x20] = 0x0020, // SPACE
573 [0x21] = 0x0021, // EXCLAMATION MARK
574 [0x22] = 0x0022, // QUOTATION MARK
575 [0x23] = 0x0023, // NUMBER SIGN
576 [0x24] = 0x0024, // DOLLAR SIGN
577 [0x25] = 0x0025, // PERCENT SIGN
578 [0x26] = 0x0026, // AMPERSAND
579 [0x27] = 0x0027, // APOSTROPHE
580 [0x28] = 0x0028, // LEFT PARENTHESIS
581 [0x29] = 0x0029, // RIGHT PARENTHESIS
582 [0x2A] = 0x002A, // ASTERISK
583 [0x2B] = 0x002B, // PLUS SIGN
584 [0x2C] = 0x002C, // COMMA
585 [0x2D] = 0x002D, // HYPHEN-MINUS
586 [0x2E] = 0x002E, // FULL STOP
587 [0x2F] = 0x002F, // SOLIDUS
588 [0x30] = 0x0030, // DIGIT ZERO
589 [0x31] = 0x0031, // DIGIT ONE
590 [0x32] = 0x0032, // DIGIT TWO
591 [0x33] = 0x0033, // DIGIT THREE
592 [0x34] = 0x0034, // DIGIT FOUR
593 [0x35] = 0x0035, // DIGIT FIVE
594 [0x36] = 0x0036, // DIGIT SIX
595 [0x37] = 0x0037, // DIGIT SEVEN
596 [0x38] = 0x0038, // DIGIT EIGHT
597 [0x39] = 0x0039, // DIGIT NINE
598 [0x3A] = 0x003A, // COLON
599 [0x3B] = 0x003B, // SEMICOLON
600 [0x3C] = 0x003C, // LESS-THAN SIGN
601 [0x3D] = 0x003D, // EQUALS SIGN
602 [0x3E] = 0x003E, // GREATER-THAN SIGN
603 [0x3F] = 0x003F, // QUESTION MARK
604 [0x40] = 0x0040, // COMMERCIAL AT
605 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
606 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
607 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
608 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
609 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
610 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
611 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
612 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
613 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
614 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
615 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
616 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
617 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
618 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
619 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
620 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
621 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
622 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
623 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
624 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
625 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
626 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
627 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
628 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
629 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
630 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
631 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
632 [0x5C] = 0x005C, // REVERSE SOLIDUS
633 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
634 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
635 [0x5F] = 0x005F, // LOW LINE
636 [0x60] = 0x0060, // GRAVE ACCENT
637 [0x61] = 0x0061, // LATIN SMALL LETTER A
638 [0x62] = 0x0062, // LATIN SMALL LETTER B
639 [0x63] = 0x0063, // LATIN SMALL LETTER C
640 [0x64] = 0x0064, // LATIN SMALL LETTER D
641 [0x65] = 0x0065, // LATIN SMALL LETTER E
642 [0x66] = 0x0066, // LATIN SMALL LETTER F
643 [0x67] = 0x0067, // LATIN SMALL LETTER G
644 [0x68] = 0x0068, // LATIN SMALL LETTER H
645 [0x69] = 0x0069, // LATIN SMALL LETTER I
646 [0x6A] = 0x006A, // LATIN SMALL LETTER J
647 [0x6B] = 0x006B, // LATIN SMALL LETTER K
648 [0x6C] = 0x006C, // LATIN SMALL LETTER L
649 [0x6D] = 0x006D, // LATIN SMALL LETTER M
650 [0x6E] = 0x006E, // LATIN SMALL LETTER N
651 [0x6F] = 0x006F, // LATIN SMALL LETTER O
652 [0x70] = 0x0070, // LATIN SMALL LETTER P
653 [0x71] = 0x0071, // LATIN SMALL LETTER Q
654 [0x72] = 0x0072, // LATIN SMALL LETTER R
655 [0x73] = 0x0073, // LATIN SMALL LETTER S
656 [0x74] = 0x0074, // LATIN SMALL LETTER T
657 [0x75] = 0x0075, // LATIN SMALL LETTER U
658 [0x76] = 0x0076, // LATIN SMALL LETTER V
659 [0x77] = 0x0077, // LATIN SMALL LETTER W
660 [0x78] = 0x0078, // LATIN SMALL LETTER X
661 [0x79] = 0x0079, // LATIN SMALL LETTER Y
662 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
663 [0x7B] = 0x007B, // LEFT CURLY BRACKET
664 [0x7C] = 0x007C, // VERTICAL LINE
665 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
666 [0x7E] = 0x007E, // TILDE
667 [0x7F] = 0x007F, // DELETE
668 [0x80] = 0x0080, // <control>
669 [0x81] = 0x0081, // <control>
670 [0x82] = 0x0082, // <control>
671 [0x83] = 0x0083, // <control>
672 [0x84] = 0x0084, // <control>
673 [0x85] = 0x0085, // <control>
674 [0x86] = 0x0086, // <control>
675 [0x87] = 0x0087, // <control>
676 [0x88] = 0x0088, // <control>
677 [0x89] = 0x0089, // <control>
678 [0x8A] = 0x008A, // <control>
679 [0x8B] = 0x008B, // <control>
680 [0x8C] = 0x008C, // <control>
681 [0x8D] = 0x008D, // <control>
682 [0x8E] = 0x008E, // <control>
683 [0x8F] = 0x008F, // <control>
684 [0x90] = 0x0090, // <control>
685 [0x91] = 0x0091, // <control>
686 [0x92] = 0x0092, // <control>
687 [0x93] = 0x0093, // <control>
688 [0x94] = 0x0094, // <control>
689 [0x95] = 0x0095, // <control>
690 [0x96] = 0x0096, // <control>
691 [0x97] = 0x0097, // <control>
692 [0x98] = 0x0098, // <control>
693 [0x99] = 0x0099, // <control>
694 [0x9A] = 0x009A, // <control>
695 [0x9B] = 0x009B, // <control>
696 [0x9C] = 0x009C, // <control>
697 [0x9D] = 0x009D, // <control>
698 [0x9E] = 0x009E, // <control>
699 [0x9F] = 0x009F, // <control>
700 [0xA0] = 0x00A0, // NO-BREAK SPACE
701 [0xA1] = 0x00A1, // INVERTED EXCLAMATION MARK
702 [0xA2] = 0x00A2, // CENT SIGN
703 [0xA3] = 0x00A3, // POUND SIGN
704 [0xA4] = 0x00A4, // CURRENCY SIGN
705 [0xA5] = 0x00A5, // YEN SIGN
706 [0xA6] = 0x00A6, // BROKEN BAR
707 [0xA7] = 0x00A7, // SECTION SIGN
708 [0xA8] = 0x00A8, // DIAERESIS
709 [0xA9] = 0x00A9, // COPYRIGHT SIGN
710 [0xAA] = 0x00AA, // FEMININE ORDINAL INDICATOR
711 [0xAB] = 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
712 [0xAC] = 0x00AC, // NOT SIGN
713 [0xAD] = 0x00AD, // SOFT HYPHEN
714 [0xAE] = 0x00AE, // REGISTERED SIGN
715 [0xAF] = 0x00AF, // MACRON
716 [0xB0] = 0x00B0, // DEGREE SIGN
717 [0xB1] = 0x00B1, // PLUS-MINUS SIGN
718 [0xB2] = 0x00B2, // SUPERSCRIPT TWO
719 [0xB3] = 0x00B3, // SUPERSCRIPT THREE
720 [0xB4] = 0x00B4, // ACUTE ACCENT
721 [0xB5] = 0x00B5, // MICRO SIGN
722 [0xB6] = 0x00B6, // PILCROW SIGN
723 [0xB7] = 0x00B7, // MIDDLE DOT
724 [0xB8] = 0x00B8, // CEDILLA
725 [0xB9] = 0x00B9, // SUPERSCRIPT ONE
726 [0xBA] = 0x00BA, // MASCULINE ORDINAL INDICATOR
727 [0xBB] = 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
728 [0xBC] = 0x00BC, // VULGAR FRACTION ONE QUARTER
729 [0xBD] = 0x00BD, // VULGAR FRACTION ONE HALF
730 [0xBE] = 0x00BE, // VULGAR FRACTION THREE QUARTERS
731 [0xBF] = 0x00BF, // INVERTED QUESTION MARK
732 [0xC0] = 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
733 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
734 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
735 [0xC3] = 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
736 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
737 [0xC5] = 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
738 [0xC6] = 0x00C6, // LATIN CAPITAL LETTER AE
739 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
740 [0xC8] = 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
741 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
742 [0xCA] = 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
743 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
744 [0xCC] = 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
745 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
746 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
747 [0xCF] = 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
748 [0xD0] = 0x00D0, // LATIN CAPITAL LETTER ETH (Icelandic)
749 [0xD1] = 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
750 [0xD2] = 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
751 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
752 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
753 [0xD5] = 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
754 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
755 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
756 [0xD8] = 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
757 [0xD9] = 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
758 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
759 [0xDB] = 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
760 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
761 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
762 [0xDE] = 0x00DE, // LATIN CAPITAL LETTER THORN (Icelandic)
763 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S (German)
764 [0xE0] = 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
765 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
766 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
767 [0xE3] = 0x00E3, // LATIN SMALL LETTER A WITH TILDE
768 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
769 [0xE5] = 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
770 [0xE6] = 0x00E6, // LATIN SMALL LETTER AE
771 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
772 [0xE8] = 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
773 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
774 [0xEA] = 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
775 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
776 [0xEC] = 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
777 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
778 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
779 [0xEF] = 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
780 [0xF0] = 0x00F0, // LATIN SMALL LETTER ETH (Icelandic)
781 [0xF1] = 0x00F1, // LATIN SMALL LETTER N WITH TILDE
782 [0xF2] = 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
783 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
784 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
785 [0xF5] = 0x00F5, // LATIN SMALL LETTER O WITH TILDE
786 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
787 [0xF7] = 0x00F7, // DIVISION SIGN
788 [0xF8] = 0x00F8, // LATIN SMALL LETTER O WITH STROKE
789 [0xF9] = 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
790 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
791 [0xFB] = 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
792 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
793 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
794 [0xFE] = 0x00FE, // LATIN SMALL LETTER THORN (Icelandic)
795 [0xFF] = 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
798 static const wchar_t ISO_8859_2_UNICODE_TABLE[] = {
799 [0x00] = 0x0000, // NULL
800 [0x01] = 0x0001, // START OF HEADING
801 [0x02] = 0x0002, // START OF TEXT
802 [0x03] = 0x0003, // END OF TEXT
803 [0x04] = 0x0004, // END OF TRANSMISSION
804 [0x05] = 0x0005, // ENQUIRY
805 [0x06] = 0x0006, // ACKNOWLEDGE
806 [0x07] = 0x0007, // BELL
807 [0x08] = 0x0008, // BACKSPACE
808 [0x09] = 0x0009, // HORIZONTAL TABULATION
809 [0x0A] = 0x000A, // LINE FEED
810 [0x0B] = 0x000B, // VERTICAL TABULATION
811 [0x0C] = 0x000C, // FORM FEED
812 [0x0D] = 0x000D, // CARRIAGE RETURN
813 [0x0E] = 0x000E, // SHIFT OUT
814 [0x0F] = 0x000F, // SHIFT IN
815 [0x10] = 0x0010, // DATA LINK ESCAPE
816 [0x11] = 0x0011, // DEVICE CONTROL ONE
817 [0x12] = 0x0012, // DEVICE CONTROL TWO
818 [0x13] = 0x0013, // DEVICE CONTROL THREE
819 [0x14] = 0x0014, // DEVICE CONTROL FOUR
820 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
821 [0x16] = 0x0016, // SYNCHRONOUS IDLE
822 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
823 [0x18] = 0x0018, // CANCEL
824 [0x19] = 0x0019, // END OF MEDIUM
825 [0x1A] = 0x001A, // SUBSTITUTE
826 [0x1B] = 0x001B, // ESCAPE
827 [0x1C] = 0x001C, // FILE SEPARATOR
828 [0x1D] = 0x001D, // GROUP SEPARATOR
829 [0x1E] = 0x001E, // RECORD SEPARATOR
830 [0x1F] = 0x001F, // UNIT SEPARATOR
831 [0x20] = 0x0020, // SPACE
832 [0x21] = 0x0021, // EXCLAMATION MARK
833 [0x22] = 0x0022, // QUOTATION MARK
834 [0x23] = 0x0023, // NUMBER SIGN
835 [0x24] = 0x0024, // DOLLAR SIGN
836 [0x25] = 0x0025, // PERCENT SIGN
837 [0x26] = 0x0026, // AMPERSAND
838 [0x27] = 0x0027, // APOSTROPHE
839 [0x28] = 0x0028, // LEFT PARENTHESIS
840 [0x29] = 0x0029, // RIGHT PARENTHESIS
841 [0x2A] = 0x002A, // ASTERISK
842 [0x2B] = 0x002B, // PLUS SIGN
843 [0x2C] = 0x002C, // COMMA
844 [0x2D] = 0x002D, // HYPHEN-MINUS
845 [0x2E] = 0x002E, // FULL STOP
846 [0x2F] = 0x002F, // SOLIDUS
847 [0x30] = 0x0030, // DIGIT ZERO
848 [0x31] = 0x0031, // DIGIT ONE
849 [0x32] = 0x0032, // DIGIT TWO
850 [0x33] = 0x0033, // DIGIT THREE
851 [0x34] = 0x0034, // DIGIT FOUR
852 [0x35] = 0x0035, // DIGIT FIVE
853 [0x36] = 0x0036, // DIGIT SIX
854 [0x37] = 0x0037, // DIGIT SEVEN
855 [0x38] = 0x0038, // DIGIT EIGHT
856 [0x39] = 0x0039, // DIGIT NINE
857 [0x3A] = 0x003A, // COLON
858 [0x3B] = 0x003B, // SEMICOLON
859 [0x3C] = 0x003C, // LESS-THAN SIGN
860 [0x3D] = 0x003D, // EQUALS SIGN
861 [0x3E] = 0x003E, // GREATER-THAN SIGN
862 [0x3F] = 0x003F, // QUESTION MARK
863 [0x40] = 0x0040, // COMMERCIAL AT
864 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
865 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
866 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
867 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
868 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
869 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
870 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
871 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
872 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
873 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
874 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
875 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
876 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
877 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
878 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
879 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
880 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
881 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
882 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
883 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
884 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
885 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
886 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
887 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
888 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
889 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
890 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
891 [0x5C] = 0x005C, // REVERSE SOLIDUS
892 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
893 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
894 [0x5F] = 0x005F, // LOW LINE
895 [0x60] = 0x0060, // GRAVE ACCENT
896 [0x61] = 0x0061, // LATIN SMALL LETTER A
897 [0x62] = 0x0062, // LATIN SMALL LETTER B
898 [0x63] = 0x0063, // LATIN SMALL LETTER C
899 [0x64] = 0x0064, // LATIN SMALL LETTER D
900 [0x65] = 0x0065, // LATIN SMALL LETTER E
901 [0x66] = 0x0066, // LATIN SMALL LETTER F
902 [0x67] = 0x0067, // LATIN SMALL LETTER G
903 [0x68] = 0x0068, // LATIN SMALL LETTER H
904 [0x69] = 0x0069, // LATIN SMALL LETTER I
905 [0x6A] = 0x006A, // LATIN SMALL LETTER J
906 [0x6B] = 0x006B, // LATIN SMALL LETTER K
907 [0x6C] = 0x006C, // LATIN SMALL LETTER L
908 [0x6D] = 0x006D, // LATIN SMALL LETTER M
909 [0x6E] = 0x006E, // LATIN SMALL LETTER N
910 [0x6F] = 0x006F, // LATIN SMALL LETTER O
911 [0x70] = 0x0070, // LATIN SMALL LETTER P
912 [0x71] = 0x0071, // LATIN SMALL LETTER Q
913 [0x72] = 0x0072, // LATIN SMALL LETTER R
914 [0x73] = 0x0073, // LATIN SMALL LETTER S
915 [0x74] = 0x0074, // LATIN SMALL LETTER T
916 [0x75] = 0x0075, // LATIN SMALL LETTER U
917 [0x76] = 0x0076, // LATIN SMALL LETTER V
918 [0x77] = 0x0077, // LATIN SMALL LETTER W
919 [0x78] = 0x0078, // LATIN SMALL LETTER X
920 [0x79] = 0x0079, // LATIN SMALL LETTER Y
921 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
922 [0x7B] = 0x007B, // LEFT CURLY BRACKET
923 [0x7C] = 0x007C, // VERTICAL LINE
924 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
925 [0x7E] = 0x007E, // TILDE
926 [0x7F] = 0x007F, // DELETE
927 [0x80] = 0x0080, // <control>
928 [0x81] = 0x0081, // <control>
929 [0x82] = 0x0082, // <control>
930 [0x83] = 0x0083, // <control>
931 [0x84] = 0x0084, // <control>
932 [0x85] = 0x0085, // <control>
933 [0x86] = 0x0086, // <control>
934 [0x87] = 0x0087, // <control>
935 [0x88] = 0x0088, // <control>
936 [0x89] = 0x0089, // <control>
937 [0x8A] = 0x008A, // <control>
938 [0x8B] = 0x008B, // <control>
939 [0x8C] = 0x008C, // <control>
940 [0x8D] = 0x008D, // <control>
941 [0x8E] = 0x008E, // <control>
942 [0x8F] = 0x008F, // <control>
943 [0x90] = 0x0090, // <control>
944 [0x91] = 0x0091, // <control>
945 [0x92] = 0x0092, // <control>
946 [0x93] = 0x0093, // <control>
947 [0x94] = 0x0094, // <control>
948 [0x95] = 0x0095, // <control>
949 [0x96] = 0x0096, // <control>
950 [0x97] = 0x0097, // <control>
951 [0x98] = 0x0098, // <control>
952 [0x99] = 0x0099, // <control>
953 [0x9A] = 0x009A, // <control>
954 [0x9B] = 0x009B, // <control>
955 [0x9C] = 0x009C, // <control>
956 [0x9D] = 0x009D, // <control>
957 [0x9E] = 0x009E, // <control>
958 [0x9F] = 0x009F, // <control>
959 [0xA0] = 0x00A0, // NO-BREAK SPACE
960 [0xA1] = 0x0104, // LATIN CAPITAL LETTER A WITH OGONEK
961 [0xA2] = 0x02D8, // BREVE
962 [0xA3] = 0x0141, // LATIN CAPITAL LETTER L WITH STROKE
963 [0xA4] = 0x00A4, // CURRENCY SIGN
964 [0xA5] = 0x013D, // LATIN CAPITAL LETTER L WITH CARON
965 [0xA6] = 0x015A, // LATIN CAPITAL LETTER S WITH ACUTE
966 [0xA7] = 0x00A7, // SECTION SIGN
967 [0xA8] = 0x00A8, // DIAERESIS
968 [0xA9] = 0x0160, // LATIN CAPITAL LETTER S WITH CARON
969 [0xAA] = 0x015E, // LATIN CAPITAL LETTER S WITH CEDILLA
970 [0xAB] = 0x0164, // LATIN CAPITAL LETTER T WITH CARON
971 [0xAC] = 0x0179, // LATIN CAPITAL LETTER Z WITH ACUTE
972 [0xAD] = 0x00AD, // SOFT HYPHEN
973 [0xAE] = 0x017D, // LATIN CAPITAL LETTER Z WITH CARON
974 [0xAF] = 0x017B, // LATIN CAPITAL LETTER Z WITH DOT ABOVE
975 [0xB0] = 0x00B0, // DEGREE SIGN
976 [0xB1] = 0x0105, // LATIN SMALL LETTER A WITH OGONEK
977 [0xB2] = 0x02DB, // OGONEK
978 [0xB3] = 0x0142, // LATIN SMALL LETTER L WITH STROKE
979 [0xB4] = 0x00B4, // ACUTE ACCENT
980 [0xB5] = 0x013E, // LATIN SMALL LETTER L WITH CARON
981 [0xB6] = 0x015B, // LATIN SMALL LETTER S WITH ACUTE
982 [0xB7] = 0x02C7, // CARON
983 [0xB8] = 0x00B8, // CEDILLA
984 [0xB9] = 0x0161, // LATIN SMALL LETTER S WITH CARON
985 [0xBA] = 0x015F, // LATIN SMALL LETTER S WITH CEDILLA
986 [0xBB] = 0x0165, // LATIN SMALL LETTER T WITH CARON
987 [0xBC] = 0x017A, // LATIN SMALL LETTER Z WITH ACUTE
988 [0xBD] = 0x02DD, // DOUBLE ACUTE ACCENT
989 [0xBE] = 0x017E, // LATIN SMALL LETTER Z WITH CARON
990 [0xBF] = 0x017C, // LATIN SMALL LETTER Z WITH DOT ABOVE
991 [0xC0] = 0x0154, // LATIN CAPITAL LETTER R WITH ACUTE
992 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
993 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
994 [0xC3] = 0x0102, // LATIN CAPITAL LETTER A WITH BREVE
995 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
996 [0xC5] = 0x0139, // LATIN CAPITAL LETTER L WITH ACUTE
997 [0xC6] = 0x0106, // LATIN CAPITAL LETTER C WITH ACUTE
998 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
999 [0xC8] = 0x010C, // LATIN CAPITAL LETTER C WITH CARON
1000 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
1001 [0xCA] = 0x0118, // LATIN CAPITAL LETTER E WITH OGONEK
1002 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
1003 [0xCC] = 0x011A, // LATIN CAPITAL LETTER E WITH CARON
1004 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
1005 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
1006 [0xCF] = 0x010E, // LATIN CAPITAL LETTER D WITH CARON
1007 [0xD0] = 0x0110, // LATIN CAPITAL LETTER D WITH STROKE
1008 [0xD1] = 0x0143, // LATIN CAPITAL LETTER N WITH ACUTE
1009 [0xD2] = 0x0147, // LATIN CAPITAL LETTER N WITH CARON
1010 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
1011 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
1012 [0xD5] = 0x0150, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
1013 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
1014 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
1015 [0xD8] = 0x0158, // LATIN CAPITAL LETTER R WITH CARON
1016 [0xD9] = 0x016E, // LATIN CAPITAL LETTER U WITH RING ABOVE
1017 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
1018 [0xDB] = 0x0170, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
1019 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
1020 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
1021 [0xDE] = 0x0162, // LATIN CAPITAL LETTER T WITH CEDILLA
1022 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S
1023 [0xE0] = 0x0155, // LATIN SMALL LETTER R WITH ACUTE
1024 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
1025 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
1026 [0xE3] = 0x0103, // LATIN SMALL LETTER A WITH BREVE
1027 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
1028 [0xE5] = 0x013A, // LATIN SMALL LETTER L WITH ACUTE
1029 [0xE6] = 0x0107, // LATIN SMALL LETTER C WITH ACUTE
1030 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
1031 [0xE8] = 0x010D, // LATIN SMALL LETTER C WITH CARON
1032 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
1033 [0xEA] = 0x0119, // LATIN SMALL LETTER E WITH OGONEK
1034 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
1035 [0xEC] = 0x011B, // LATIN SMALL LETTER E WITH CARON
1036 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
1037 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
1038 [0xEF] = 0x010F, // LATIN SMALL LETTER D WITH CARON
1039 [0xF0] = 0x0111, // LATIN SMALL LETTER D WITH STROKE
1040 [0xF1] = 0x0144, // LATIN SMALL LETTER N WITH ACUTE
1041 [0xF2] = 0x0148, // LATIN SMALL LETTER N WITH CARON
1042 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
1043 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
1044 [0xF5] = 0x0151, // LATIN SMALL LETTER O WITH DOUBLE ACUTE
1045 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
1046 [0xF7] = 0x00F7, // DIVISION SIGN
1047 [0xF8] = 0x0159, // LATIN SMALL LETTER R WITH CARON
1048 [0xF9] = 0x016F, // LATIN SMALL LETTER U WITH RING ABOVE
1049 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
1050 [0xFB] = 0x0171, // LATIN SMALL LETTER U WITH DOUBLE ACUTE
1051 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
1052 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
1053 [0xFE] = 0x0163, // LATIN SMALL LETTER T WITH CEDILLA
1054 [0xFF] = 0x02D9, // DOT ABOVE
1057 static const wchar_t ISO_8859_15_UNICODE_TABLE[] = {
1058 [0x00] = 0x0000, // NULL
1059 [0x01] = 0x0001, // START OF HEADING
1060 [0x02] = 0x0002, // START OF TEXT
1061 [0x03] = 0x0003, // END OF TEXT
1062 [0x04] = 0x0004, // END OF TRANSMISSION
1063 [0x05] = 0x0005, // ENQUIRY
1064 [0x06] = 0x0006, // ACKNOWLEDGE
1065 [0x07] = 0x0007, // BELL
1066 [0x08] = 0x0008, // BACKSPACE
1067 [0x09] = 0x0009, // HORIZONTAL TABULATION
1068 [0x0A] = 0x000A, // LINE FEED
1069 [0x0B] = 0x000B, // VERTICAL TABULATION
1070 [0x0C] = 0x000C, // FORM FEED
1071 [0x0D] = 0x000D, // CARRIAGE RETURN
1072 [0x0E] = 0x000E, // SHIFT OUT
1073 [0x0F] = 0x000F, // SHIFT IN
1074 [0x10] = 0x0010, // DATA LINK ESCAPE
1075 [0x11] = 0x0011, // DEVICE CONTROL ONE
1076 [0x12] = 0x0012, // DEVICE CONTROL TWO
1077 [0x13] = 0x0013, // DEVICE CONTROL THREE
1078 [0x14] = 0x0014, // DEVICE CONTROL FOUR
1079 [0x15] = 0x0015, // NEGATIVE ACKNOWLEDGE
1080 [0x16] = 0x0016, // SYNCHRONOUS IDLE
1081 [0x17] = 0x0017, // END OF TRANSMISSION BLOCK
1082 [0x18] = 0x0018, // CANCEL
1083 [0x19] = 0x0019, // END OF MEDIUM
1084 [0x1A] = 0x001A, // SUBSTITUTE
1085 [0x1B] = 0x001B, // ESCAPE
1086 [0x1C] = 0x001C, // FILE SEPARATOR
1087 [0x1D] = 0x001D, // GROUP SEPARATOR
1088 [0x1E] = 0x001E, // RECORD SEPARATOR
1089 [0x1F] = 0x001F, // UNIT SEPARATOR
1090 [0x20] = 0x0020, // SPACE
1091 [0x21] = 0x0021, // EXCLAMATION MARK
1092 [0x22] = 0x0022, // QUOTATION MARK
1093 [0x23] = 0x0023, // NUMBER SIGN
1094 [0x24] = 0x0024, // DOLLAR SIGN
1095 [0x25] = 0x0025, // PERCENT SIGN
1096 [0x26] = 0x0026, // AMPERSAND
1097 [0x27] = 0x0027, // APOSTROPHE
1098 [0x28] = 0x0028, // LEFT PARENTHESIS
1099 [0x29] = 0x0029, // RIGHT PARENTHESIS
1100 [0x2A] = 0x002A, // ASTERISK
1101 [0x2B] = 0x002B, // PLUS SIGN
1102 [0x2C] = 0x002C, // COMMA
1103 [0x2D] = 0x002D, // HYPHEN-MINUS
1104 [0x2E] = 0x002E, // FULL STOP
1105 [0x2F] = 0x002F, // SOLIDUS
1106 [0x30] = 0x0030, // DIGIT ZERO
1107 [0x31] = 0x0031, // DIGIT ONE
1108 [0x32] = 0x0032, // DIGIT TWO
1109 [0x33] = 0x0033, // DIGIT THREE
1110 [0x34] = 0x0034, // DIGIT FOUR
1111 [0x35] = 0x0035, // DIGIT FIVE
1112 [0x36] = 0x0036, // DIGIT SIX
1113 [0x37] = 0x0037, // DIGIT SEVEN
1114 [0x38] = 0x0038, // DIGIT EIGHT
1115 [0x39] = 0x0039, // DIGIT NINE
1116 [0x3A] = 0x003A, // COLON
1117 [0x3B] = 0x003B, // SEMICOLON
1118 [0x3C] = 0x003C, // LESS-THAN SIGN
1119 [0x3D] = 0x003D, // EQUALS SIGN
1120 [0x3E] = 0x003E, // GREATER-THAN SIGN
1121 [0x3F] = 0x003F, // QUESTION MARK
1122 [0x40] = 0x0040, // COMMERCIAL AT
1123 [0x41] = 0x0041, // LATIN CAPITAL LETTER A
1124 [0x42] = 0x0042, // LATIN CAPITAL LETTER B
1125 [0x43] = 0x0043, // LATIN CAPITAL LETTER C
1126 [0x44] = 0x0044, // LATIN CAPITAL LETTER D
1127 [0x45] = 0x0045, // LATIN CAPITAL LETTER E
1128 [0x46] = 0x0046, // LATIN CAPITAL LETTER F
1129 [0x47] = 0x0047, // LATIN CAPITAL LETTER G
1130 [0x48] = 0x0048, // LATIN CAPITAL LETTER H
1131 [0x49] = 0x0049, // LATIN CAPITAL LETTER I
1132 [0x4A] = 0x004A, // LATIN CAPITAL LETTER J
1133 [0x4B] = 0x004B, // LATIN CAPITAL LETTER K
1134 [0x4C] = 0x004C, // LATIN CAPITAL LETTER L
1135 [0x4D] = 0x004D, // LATIN CAPITAL LETTER M
1136 [0x4E] = 0x004E, // LATIN CAPITAL LETTER N
1137 [0x4F] = 0x004F, // LATIN CAPITAL LETTER O
1138 [0x50] = 0x0050, // LATIN CAPITAL LETTER P
1139 [0x51] = 0x0051, // LATIN CAPITAL LETTER Q
1140 [0x52] = 0x0052, // LATIN CAPITAL LETTER R
1141 [0x53] = 0x0053, // LATIN CAPITAL LETTER S
1142 [0x54] = 0x0054, // LATIN CAPITAL LETTER T
1143 [0x55] = 0x0055, // LATIN CAPITAL LETTER U
1144 [0x56] = 0x0056, // LATIN CAPITAL LETTER V
1145 [0x57] = 0x0057, // LATIN CAPITAL LETTER W
1146 [0x58] = 0x0058, // LATIN CAPITAL LETTER X
1147 [0x59] = 0x0059, // LATIN CAPITAL LETTER Y
1148 [0x5A] = 0x005A, // LATIN CAPITAL LETTER Z
1149 [0x5B] = 0x005B, // LEFT SQUARE BRACKET
1150 [0x5C] = 0x005C, // REVERSE SOLIDUS
1151 [0x5D] = 0x005D, // RIGHT SQUARE BRACKET
1152 [0x5E] = 0x005E, // CIRCUMFLEX ACCENT
1153 [0x5F] = 0x005F, // LOW LINE
1154 [0x60] = 0x0060, // GRAVE ACCENT
1155 [0x61] = 0x0061, // LATIN SMALL LETTER A
1156 [0x62] = 0x0062, // LATIN SMALL LETTER B
1157 [0x63] = 0x0063, // LATIN SMALL LETTER C
1158 [0x64] = 0x0064, // LATIN SMALL LETTER D
1159 [0x65] = 0x0065, // LATIN SMALL LETTER E
1160 [0x66] = 0x0066, // LATIN SMALL LETTER F
1161 [0x67] = 0x0067, // LATIN SMALL LETTER G
1162 [0x68] = 0x0068, // LATIN SMALL LETTER H
1163 [0x69] = 0x0069, // LATIN SMALL LETTER I
1164 [0x6A] = 0x006A, // LATIN SMALL LETTER J
1165 [0x6B] = 0x006B, // LATIN SMALL LETTER K
1166 [0x6C] = 0x006C, // LATIN SMALL LETTER L
1167 [0x6D] = 0x006D, // LATIN SMALL LETTER M
1168 [0x6E] = 0x006E, // LATIN SMALL LETTER N
1169 [0x6F] = 0x006F, // LATIN SMALL LETTER O
1170 [0x70] = 0x0070, // LATIN SMALL LETTER P
1171 [0x71] = 0x0071, // LATIN SMALL LETTER Q
1172 [0x72] = 0x0072, // LATIN SMALL LETTER R
1173 [0x73] = 0x0073, // LATIN SMALL LETTER S
1174 [0x74] = 0x0074, // LATIN SMALL LETTER T
1175 [0x75] = 0x0075, // LATIN SMALL LETTER U
1176 [0x76] = 0x0076, // LATIN SMALL LETTER V
1177 [0x77] = 0x0077, // LATIN SMALL LETTER W
1178 [0x78] = 0x0078, // LATIN SMALL LETTER X
1179 [0x79] = 0x0079, // LATIN SMALL LETTER Y
1180 [0x7A] = 0x007A, // LATIN SMALL LETTER Z
1181 [0x7B] = 0x007B, // LEFT CURLY BRACKET
1182 [0x7C] = 0x007C, // VERTICAL LINE
1183 [0x7D] = 0x007D, // RIGHT CURLY BRACKET
1184 [0x7E] = 0x007E, // TILDE
1185 [0x7F] = 0x007F, // DELETE
1186 [0x80] = 0x0080, // <control>
1187 [0x81] = 0x0081, // <control>
1188 [0x82] = 0x0082, // <control>
1189 [0x83] = 0x0083, // <control>
1190 [0x84] = 0x0084, // <control>
1191 [0x85] = 0x0085, // <control>
1192 [0x86] = 0x0086, // <control>
1193 [0x87] = 0x0087, // <control>
1194 [0x88] = 0x0088, // <control>
1195 [0x89] = 0x0089, // <control>
1196 [0x8A] = 0x008A, // <control>
1197 [0x8B] = 0x008B, // <control>
1198 [0x8C] = 0x008C, // <control>
1199 [0x8D] = 0x008D, // <control>
1200 [0x8E] = 0x008E, // <control>
1201 [0x8F] = 0x008F, // <control>
1202 [0x90] = 0x0090, // <control>
1203 [0x91] = 0x0091, // <control>
1204 [0x92] = 0x0092, // <control>
1205 [0x93] = 0x0093, // <control>
1206 [0x94] = 0x0094, // <control>
1207 [0x95] = 0x0095, // <control>
1208 [0x96] = 0x0096, // <control>
1209 [0x97] = 0x0097, // <control>
1210 [0x98] = 0x0098, // <control>
1211 [0x99] = 0x0099, // <control>
1212 [0x9A] = 0x009A, // <control>
1213 [0x9B] = 0x009B, // <control>
1214 [0x9C] = 0x009C, // <control>
1215 [0x9D] = 0x009D, // <control>
1216 [0x9E] = 0x009E, // <control>
1217 [0x9F] = 0x009F, // <control>
1218 [0xA0] = 0x00A0, // NO-BREAK SPACE
1219 [0xA1] = 0x00A1, // INVERTED EXCLAMATION MARK
1220 [0xA2] = 0x00A2, // CENT SIGN
1221 [0xA3] = 0x00A3, // POUND SIGN
1222 [0xA4] = 0x20AC, // EURO SIGN
1223 [0xA5] = 0x00A5, // YEN SIGN
1224 [0xA6] = 0x0160, // LATIN CAPITAL LETTER S WITH CARON
1225 [0xA7] = 0x00A7, // SECTION SIGN
1226 [0xA8] = 0x0161, // LATIN SMALL LETTER S WITH CARON
1227 [0xA9] = 0x00A9, // COPYRIGHT SIGN
1228 [0xAA] = 0x00AA, // FEMININE ORDINAL INDICATOR
1229 [0xAB] = 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
1230 [0xAC] = 0x00AC, // NOT SIGN
1231 [0xAD] = 0x00AD, // SOFT HYPHEN
1232 [0xAE] = 0x00AE, // REGISTERED SIGN
1233 [0xAF] = 0x00AF, // MACRON
1234 [0xB0] = 0x00B0, // DEGREE SIGN
1235 [0xB1] = 0x00B1, // PLUS-MINUS SIGN
1236 [0xB2] = 0x00B2, // SUPERSCRIPT TWO
1237 [0xB3] = 0x00B3, // SUPERSCRIPT THREE
1238 [0xB4] = 0x017D, // LATIN CAPITAL LETTER Z WITH CARON
1239 [0xB5] = 0x00B5, // MICRO SIGN
1240 [0xB6] = 0x00B6, // PILCROW SIGN
1241 [0xB7] = 0x00B7, // MIDDLE DOT
1242 [0xB8] = 0x017E, // LATIN SMALL LETTER Z WITH CARON
1243 [0xB9] = 0x00B9, // SUPERSCRIPT ONE
1244 [0xBA] = 0x00BA, // MASCULINE ORDINAL INDICATOR
1245 [0xBB] = 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
1246 [0xBC] = 0x0152, // LATIN CAPITAL LIGATURE OE
1247 [0xBD] = 0x0153, // LATIN SMALL LIGATURE OE
1248 [0xBE] = 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS
1249 [0xBF] = 0x00BF, // INVERTED QUESTION MARK
1250 [0xC0] = 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
1251 [0xC1] = 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
1252 [0xC2] = 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
1253 [0xC3] = 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
1254 [0xC4] = 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
1255 [0xC5] = 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
1256 [0xC6] = 0x00C6, // LATIN CAPITAL LETTER AE
1257 [0xC7] = 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
1258 [0xC8] = 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
1259 [0xC9] = 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
1260 [0xCA] = 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
1261 [0xCB] = 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
1262 [0xCC] = 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
1263 [0xCD] = 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
1264 [0xCE] = 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
1265 [0xCF] = 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
1266 [0xD0] = 0x00D0, // LATIN CAPITAL LETTER ETH
1267 [0xD1] = 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
1268 [0xD2] = 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
1269 [0xD3] = 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
1270 [0xD4] = 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
1271 [0xD5] = 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
1272 [0xD6] = 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
1273 [0xD7] = 0x00D7, // MULTIPLICATION SIGN
1274 [0xD8] = 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
1275 [0xD9] = 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
1276 [0xDA] = 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
1277 [0xDB] = 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
1278 [0xDC] = 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
1279 [0xDD] = 0x00DD, // LATIN CAPITAL LETTER Y WITH ACUTE
1280 [0xDE] = 0x00DE, // LATIN CAPITAL LETTER THORN
1281 [0xDF] = 0x00DF, // LATIN SMALL LETTER SHARP S
1282 [0xE0] = 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
1283 [0xE1] = 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
1284 [0xE2] = 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
1285 [0xE3] = 0x00E3, // LATIN SMALL LETTER A WITH TILDE
1286 [0xE4] = 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
1287 [0xE5] = 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
1288 [0xE6] = 0x00E6, // LATIN SMALL LETTER AE
1289 [0xE7] = 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
1290 [0xE8] = 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
1291 [0xE9] = 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
1292 [0xEA] = 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
1293 [0xEB] = 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
1294 [0xEC] = 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
1295 [0xED] = 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
1296 [0xEE] = 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
1297 [0xEF] = 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
1298 [0xF0] = 0x00F0, // LATIN SMALL LETTER ETH
1299 [0xF1] = 0x00F1, // LATIN SMALL LETTER N WITH TILDE
1300 [0xF2] = 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
1301 [0xF3] = 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
1302 [0xF4] = 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
1303 [0xF5] = 0x00F5, // LATIN SMALL LETTER O WITH TILDE
1304 [0xF6] = 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
1305 [0xF7] = 0x00F7, // DIVISION SIGN
1306 [0xF8] = 0x00F8, // LATIN SMALL LETTER O WITH STROKE
1307 [0xF9] = 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
1308 [0xFA] = 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
1309 [0xFB] = 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
1310 [0xFC] = 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
1311 [0xFD] = 0x00FD, // LATIN SMALL LETTER Y WITH ACUTE
1312 [0xFE] = 0x00FE, // LATIN SMALL LETTER THORN
1313 [0xFF] = 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
1316 static struct map *charset_map safe;
1317 DEF_LOOKUP_CMD(charset_handle, charset_map);
1319 DEF_CMD(charset_char)
1322 wchar_t *tbl = ci->home->data;
1324 ret = home_call(ci->home->parent, "doc:byte", ci->focus,
1325 ci->num, ci->mark, NULL,
1326 ci->num2, ci->mark2);
1328 if (!ci->mark2 && ret != CHAR_RET(WEOF) && ret >0)
1329 ret = CHAR_RET(tbl[ret & 0xff]);
1335 struct command *cb safe;
1336 struct pane *p safe;
1341 DEF_CB(charset_content_cb)
1343 struct charsetcb *c = container_of(ci->comm, struct charsetcb, c);
1347 if (!ci->str || ci->num2 <= 0 || c->noalloc)
1348 return comm_call(c->cb, ci->key, c->p,
1349 c->tbl[ci->num & 0xff], ci->mark, ci->str,
1350 ci->num2, NULL, NULL,
1352 /* Buffer for utf8 content could be as much as 4 times ->str,
1353 * but that is unlikely. Allocate room for double, up to 1M.
1355 bsize = ci->num2 * 2;
1356 if (bsize > 1024*1024)
1359 buf_resize(&b, bsize);
1360 for (i = 0; i < ci->num2 && b.len < 1024*1024-2; i++) {
1361 unsigned char cc = ci->str[i];
1362 buf_append(&b, c->tbl[cc]);
1364 rv = comm_call(c->cb, ci->key, c->p,
1365 c->tbl[ci->num & 0xff], ci->mark,
1366 buf_final(&b), b.len,
1367 NULL, NULL, ci->x, 0);
1369 /* None of the extra was consumed. Assume that will continue */
1374 if (rv >= b.len + 1) {
1375 /* All of the extra (that we decoded) was consumed */
1379 /* Only some was consumed. We needed to map back to number of bytes. */
1381 for (i = 0; i < ci->num2 && b.len < (rv-1); i++)
1382 buf_append(&b, c->tbl[(unsigned char)ci->str[i]]);
1387 DEF_CMD(charset_content)
1390 wchar_t *tbl = ci->home->data;
1392 if (!ci->comm2 || !ci->mark)
1395 c.c = charset_content_cb;
1400 return home_call_comm(ci->home->parent, ci->key, ci->home,
1401 &c.c, 0, ci->mark, NULL, 0, ci->mark2);
1404 static int charset_to_utf8(const struct cmd_info *ci safe, const wchar_t tbl[])
1410 if (!s || !ci->comm2)
1414 buf_append(&b, tbl[*s & 0xff]);
1417 comm_call(ci->comm2, "cb", ci->focus, 0, NULL, buf_final(&b));
1422 DEF_CMD(win1251_to_utf8)
1424 return charset_to_utf8(ci, WIN1251_UNICODE_TABLE);
1427 DEF_CMD(win1252_to_utf8)
1429 return charset_to_utf8(ci, WIN1252_UNICODE_TABLE);
1432 DEF_CMD(iso8859_1_to_utf8)
1434 return charset_to_utf8(ci, ISO_8859_1_UNICODE_TABLE);
1437 DEF_CMD(iso8859_2_to_utf8)
1439 return charset_to_utf8(ci, ISO_8859_2_UNICODE_TABLE);
1442 DEF_CMD(iso8859_15_to_utf8)
1444 return charset_to_utf8(ci, ISO_8859_15_UNICODE_TABLE);
1447 DEF_CMD(win1251_attach)
1451 p = pane_register(ci->focus, 0, &charset_handle.c,
1452 (wchar_t*) WIN1251_UNICODE_TABLE);
1456 return comm_call(ci->comm2, "cb", p);
1459 DEF_CMD(win1252_attach)
1463 p = pane_register(ci->focus, 0, &charset_handle.c,
1464 (wchar_t*)WIN1252_UNICODE_TABLE);
1468 return comm_call(ci->comm2, "cb", p);
1471 DEF_CMD(iso8859_1_attach)
1475 p = pane_register(ci->focus, 0, &charset_handle.c,
1476 (wchar_t*)ISO_8859_1_UNICODE_TABLE);
1480 return comm_call(ci->comm2, "cb", p);
1483 DEF_CMD(iso8859_2_attach)
1487 p = pane_register(ci->focus, 0, &charset_handle.c,
1488 (wchar_t*)ISO_8859_2_UNICODE_TABLE);
1492 return comm_call(ci->comm2, "cb", p);
1495 DEF_CMD(iso8859_15_attach)
1499 p = pane_register(ci->focus, 0, &charset_handle.c,
1500 (wchar_t*)ISO_8859_15_UNICODE_TABLE);
1504 return comm_call(ci->comm2, "cb", p);
1507 void edlib_init(struct pane *ed safe)
1509 charset_map = key_alloc();
1511 key_add(charset_map, "doc:char", &charset_char);
1512 key_add(charset_map, "doc:content", &charset_content);
1513 /* No doc:content-bytes - that wouldn't make sense */
1515 /* Use 1251 for any unknown 'windows' charset */
1516 call_comm("global-set-command-prefix", ed, &win1251_attach,
1517 0, NULL, "attach-charset-windows-");
1518 call_comm("global-set-command-prefix", ed, &win1251_to_utf8,
1519 0, NULL, "charset-to-utf8-windows-");
1521 call_comm("global-set-command", ed, &win1252_attach, 0, NULL,
1522 "attach-charset-windows-1252");
1523 call_comm("global-set-command", ed, &win1252_to_utf8, 0, NULL,
1524 "charset-to-utf8-windows-1252");
1526 /* Use iso-8859-15 for any unknown iso-8859, and for ascii */
1527 call_comm("global-set-command-prefix", ed, &iso8859_15_attach,
1528 0, NULL, "attach-charset-iso-8859-");
1529 call_comm("global-set-command-prefix", ed, &iso8859_15_to_utf8,
1530 0, NULL, "charset-to-utf8-iso-8859-");
1532 call_comm("global-set-command", ed, &iso8859_15_attach, 0, NULL,
1533 "attach-charset-us-ascii");
1534 call_comm("global-set-command", ed, &iso8859_15_to_utf8, 0, NULL,
1535 "charset-to-utf8-us-ascii");
1537 call_comm("global-set-command", ed, &iso8859_1_attach, 0, NULL,
1538 "attach-charset-iso-8859-1");
1539 call_comm("global-set-command", ed, &iso8859_1_to_utf8, 0, NULL,
1540 "charset-to-utf8-iso-8859-1");
1542 call_comm("global-set-command", ed, &iso8859_2_attach, 0, NULL,
1543 "attach-charset-iso-8859-2");
1544 call_comm("global-set-command", ed, &iso8859_2_to_utf8, 0, NULL,
1545 "charset-to-utf8-iso-8859-2");