aboutsummaryrefslogtreecommitdiffstats
path: root/src/libs/utils/textcodec.cpp
blob: c1ccb8929c94d40555834604c996c11148b0215b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// Copyright (C) 2025 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0

#include "textcodec.h"

#include "qtcassert.h"

#include <QHash>

#include <set>

namespace Utils {

// TextEncoding

static QByteArray canonicalName(const QByteArray &input)
{
    QTC_ASSERT(!input.isEmpty(), return input);

    // Avoid construction of too many QStringDecoders to get canonical names.
    static QHash<QByteArray, QByteArray> s_canonicalNames {
        // FIXME: We can save a few more cycles by pre-populatong the well-known ones
        // here once the transition off QTextCodec is finished. For now leave it in
        // to exercise the code paths below for better test coverage.
        // {"utf-8", "UTF-8" },
        // {"UTF-8", "UTF-8" },
        // {"iso-8859-1", "ISO-8859-1"},
        // {"ISO-8859-1", "ISO-8859-1"},
    };

    const auto it = s_canonicalNames.find(input);
    if (it != s_canonicalNames.end())
        return *it;

    if (input == "System") {
        QStringDecoder systemDecoder(QStringConverter::System);
        QTC_CHECK(systemDecoder.isValid());
        const QByteArray systemCanonicalized = systemDecoder.name();
        QTC_CHECK(!systemCanonicalized.isEmpty());
        s_canonicalNames.insert(input, systemCanonicalized);
        return systemCanonicalized;
    }

    const QStringDecoder builtinDecoder(input);
    if (builtinDecoder.isValid()) {
        const QByteArray builtinCanonicalized = builtinDecoder.name();
        if (!builtinCanonicalized.isEmpty()) {
            s_canonicalNames.insert(input, builtinCanonicalized);
            return builtinCanonicalized;
        }
    }

    QTC_CHECK(false);
    return {};
}

TextEncoding::TextEncoding() = default;

TextEncoding::TextEncoding(const QByteArray &name)
    : m_name(canonicalName(name))
{}

TextEncoding::TextEncoding(QStringConverter::Encoding encoding)
    : m_name(QStringConverter::nameForEncoding(encoding))
{}

bool TextEncoding::isValid() const
{
    return !m_name.isEmpty();
}

QString TextEncoding::displayName() const
{
    return isValid() ? QString::fromLatin1(m_name) : QString("Null codec");
}

QString TextEncoding::fullDisplayName() const
{
    QString compoundName = displayName();

#if 0
    // FIXME: There is no replacement for QTextCodec::aliases() in the
    // QStringConverter world (yet?).
    QTextCodec *codec = m_name == QStringEncoder::nameForEncoding(QStringConverter::System)
                            ? QTextCodec::codecForLocale()
                            : QTextCodec::codecForName(m_name);

    if (codec) {
        for (const QByteArray &alias : codec->aliases()) {
            compoundName += QLatin1String(" / ");
            compoundName += QString::fromLatin1(alias);
        }
    }
#endif
    return compoundName;
}

bool TextEncoding::isUtf8() const
{
    return m_name == "UTF-8";
}

QString TextEncoding::decode(QByteArrayView encoded) const
{
    return QStringDecoder(m_name).decode(encoded);
}

QByteArray TextEncoding::encode(QStringView decoded) const
{
    return QStringEncoder(m_name).encode(decoded);
}

bool operator==(const TextEncoding &left, const TextEncoding &right)
{
    return left.name() == right.name();
}

bool operator!=(const TextEncoding &left, const TextEncoding &right)
{
    return left.name() != right.name();
}

const QList<TextEncoding> &TextEncoding::availableEncodings()
{
    static const QList<TextEncoding> theAvailableEncoding = [] {
        QList<TextEncoding> encodings;
#if QT_VERSION >= QT_VERSION_CHECK(6, 7, 0)
        std::set<QString> encodingNames;
        const QList<QString> codecs = QStringConverter::availableCodecs();
        for (const QString &name : codecs) {
            // Drop encoders that don't even remember their names.
            QStringEncoder encoder(name.toUtf8());
            if (!encoder.isValid())
                continue;
            if (QByteArray(encoder.name()).isEmpty())
                continue;
            const auto [_, inserted] = encodingNames.insert(name);
            QTC_ASSERT(inserted, continue);
            TextEncoding encoding(name.toUtf8());
            encodings.append(encoding);
        }
#else
        // Before Qt 6.7, QStringConverter::availableCodecs did not exist,
        // even if Qt was built with ICU. Offer at least the well-known ones.
        for (int enc = 0; enc < QStringConverter::Encoding::LastEncoding; ++enc)
            encodings.append(TextEncoding(QStringConverter::Encoding(enc)));
#endif
        return encodings;
    }();
    return theAvailableEncoding;
}

static TextEncoding theEncodingForLocale = TextEncoding(QStringEncoder::System);

void TextEncoding::setEncodingForLocale(const QByteArray &codecName)
{
    theEncodingForLocale = codecName;
}

TextEncoding TextEncoding::encodingForLocale()
{
    return theEncodingForLocale;
}

} // namespace Utils