1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
#ifndef LH_ENCODINGS_H
#define LH_ENCODINGS_H
#include "types.h"
namespace litehtml
{
// https://encoding.spec.whatwg.org/#names-and-labels
enum class encoding
{
null, // indicates error or absence of encoding
utf_8,
// Legacy single-byte encodings; must be in sync with single_byte_indexes
ibm866,
iso_8859_2,
iso_8859_3,
iso_8859_4,
iso_8859_5,
iso_8859_6,
iso_8859_7,
iso_8859_8,
iso_8859_8_i,
iso_8859_10,
iso_8859_13,
iso_8859_14,
iso_8859_15,
iso_8859_16,
koi8_r,
koi8_u,
macintosh,
windows_874,
windows_1250,
windows_1251,
windows_1252,
windows_1253,
windows_1254,
windows_1255,
windows_1256,
windows_1257,
windows_1258,
x_mac_cyrillic,
// Legacy multi-byte East Asian encodings
gbk,
gb18030,
big5,
euc_jp,
iso_2022_jp,
shift_jis,
euc_kr,
// Legacy miscellaneous encodings
replacement,
utf_16be,
utf_16le,
x_user_defined
};
// https://html.spec.whatwg.org/multipage/parsing.html#concept-encoding-confidence
enum class confidence // encoding confidence
{
tentative,
certain,
// irrelevant // not used here
};
// Used as argument for document::createFromString, parse_html and encoding_sniffing_algorithm.
struct estring : string // string with encoding
{
litehtml::encoding encoding;
litehtml::confidence confidence;
estring(const string& str, litehtml::encoding encoding = encoding::null, litehtml::confidence confidence = confidence::certain)
: string(str), encoding(encoding), confidence(confidence) {}
estring(const char* str) : string(str), encoding(encoding::null), confidence(confidence::certain) {}
};
encoding bom_sniff(const string& str);
void encoding_sniffing_algorithm(estring& str);
encoding get_encoding(string label);
encoding extract_encoding_from_meta_element(string str);
void decode(string input, encoding coding, string& output);
string decode(string input, encoding coding);
} // namespace litehtml
#endif // LH_ENCODINGS_H
|