aboutsummaryrefslogtreecommitdiffstats
path: root/include/litehtml/encodings.h
blob: ec5cb03e831c16561c052650e81f5e6f30e5e1ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#ifndef LH_ENCODINGS_H
#define LH_ENCODINGS_H

#include "types.h"
namespace litehtml
{

// https://encoding.spec.whatwg.org/#names-and-labels
enum class encoding
{
	null,  // indicates error or absence of encoding
	utf_8,

	// Legacy single-byte encodings; must be in sync with single_byte_indexes
	ibm866,
	iso_8859_2,
	iso_8859_3,
	iso_8859_4,
	iso_8859_5,
	iso_8859_6,
	iso_8859_7,
	iso_8859_8,
	iso_8859_8_i,
	iso_8859_10,
	iso_8859_13,
	iso_8859_14,
	iso_8859_15,
	iso_8859_16,
	koi8_r,
	koi8_u,
	macintosh,
	windows_874,
	windows_1250,
	windows_1251,
	windows_1252,
	windows_1253,
	windows_1254,
	windows_1255,
	windows_1256,
	windows_1257,
	windows_1258,
	x_mac_cyrillic,

	// Legacy multi-byte East Asian encodings
	gbk,
	gb18030,
	big5,
	euc_jp,
	iso_2022_jp,
	shift_jis,
	euc_kr,

	// Legacy miscellaneous encodings
	replacement,
	utf_16be,
	utf_16le,
	x_user_defined
};

// https://html.spec.whatwg.org/multipage/parsing.html#concept-encoding-confidence
enum class confidence // encoding confidence
{
	tentative,
	certain,
	// irrelevant // not used here
};

// Used as argument for document::createFromString, parse_html and encoding_sniffing_algorithm.
struct estring : string  // string with encoding
{
	litehtml::encoding encoding;
	litehtml::confidence confidence;

	estring(const string& str, litehtml::encoding encoding = encoding::null, litehtml::confidence confidence = confidence::certain)
		: string(str), encoding(encoding), confidence(confidence) {}

	estring(const char* str) : string(str), encoding(encoding::null), confidence(confidence::certain) {}
};


encoding bom_sniff(const string& str);
void encoding_sniffing_algorithm(estring& str);

encoding get_encoding(string label);
encoding extract_encoding_from_meta_element(string str);

void decode(string input, encoding coding, string& output);
string decode(string input, encoding coding);

} // namespace litehtml

#endif // LH_ENCODINGS_H