1 | /*
|
---|
2 | * Copyright (C) 2000 Peter Kelly <[email protected]>
|
---|
3 | * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
|
---|
4 | * Copyright (C) 2006 Alexey Proskuryakov <[email protected]>
|
---|
5 | * Copyright (C) 2007 Samuel Weinig <[email protected]>
|
---|
6 | * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
|
---|
7 | * Copyright (C) 2008 Holger Hans Peter Freyther
|
---|
8 | * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
|
---|
9 | * Copyright (C) 2010 Patrick Gansterer <[email protected]>
|
---|
10 | * Copyright (C) 2013 Samsung Electronics. All rights reserved.
|
---|
11 | *
|
---|
12 | * This library is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU Library General Public
|
---|
14 | * License as published by the Free Software Foundation; either
|
---|
15 | * version 2 of the License, or (at your option) any later version.
|
---|
16 | *
|
---|
17 | * This library is distributed in the hope that it will be useful,
|
---|
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * Library General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU Library General Public License
|
---|
23 | * along with this library; see the file COPYING.LIB. If not, write to
|
---|
24 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
---|
25 | * Boston, MA 02110-1301, USA.
|
---|
26 | */
|
---|
27 |
|
---|
28 | #include "config.h"
|
---|
29 | #include "XMLDocumentParser.h"
|
---|
30 |
|
---|
31 | #include "CDATASection.h"
|
---|
32 | #include "Comment.h"
|
---|
33 | #include "CachedResourceLoader.h"
|
---|
34 | #include "CommonAtomStrings.h"
|
---|
35 | #include "Document.h"
|
---|
36 | #include "DocumentFragment.h"
|
---|
37 | #include "DocumentType.h"
|
---|
38 | #include "Frame.h"
|
---|
39 | #include "FrameDestructionObserverInlines.h"
|
---|
40 | #include "FrameLoader.h"
|
---|
41 | #include "HTMLEntityParser.h"
|
---|
42 | #include "HTMLHtmlElement.h"
|
---|
43 | #include "HTMLParserIdioms.h"
|
---|
44 | #include "HTMLTemplateElement.h"
|
---|
45 | #include "HTTPParsers.h"
|
---|
46 | #include "InlineClassicScript.h"
|
---|
47 | #include "MIMETypeRegistry.h"
|
---|
48 | #include "Page.h"
|
---|
49 | #include "PageConsoleClient.h"
|
---|
50 | #include "PendingScript.h"
|
---|
51 | #include "ProcessingInstruction.h"
|
---|
52 | #include "ResourceError.h"
|
---|
53 | #include "ResourceResponse.h"
|
---|
54 | #include "SVGElement.h"
|
---|
55 | #include "ScriptElement.h"
|
---|
56 | #include "ScriptSourceCode.h"
|
---|
57 | #include "Settings.h"
|
---|
58 | #include "SharedBuffer.h"
|
---|
59 | #include "StyleScope.h"
|
---|
60 | #include "TextResourceDecoder.h"
|
---|
61 | #include "TransformSource.h"
|
---|
62 | #include "XMLNSNames.h"
|
---|
63 | #include "XMLDocumentParserScope.h"
|
---|
64 | #include <libxml/parserInternals.h>
|
---|
65 | #include <wtf/unicode/CharacterNames.h>
|
---|
66 | #include <wtf/unicode/UTF8Conversion.h>
|
---|
67 |
|
---|
68 | #if ENABLE(XSLT)
|
---|
69 | #include "XMLTreeViewer.h"
|
---|
70 | #include <libxslt/xslt.h>
|
---|
71 | #endif
|
---|
72 |
|
---|
73 | namespace WebCore {
|
---|
74 |
|
---|
75 | #if ENABLE(XSLT)
|
---|
76 |
|
---|
77 | static inline bool shouldRenderInXMLTreeViewerMode(Document& document)
|
---|
78 | {
|
---|
79 | if (document.sawElementsInKnownNamespaces())
|
---|
80 | return false;
|
---|
81 |
|
---|
82 | if (document.transformSourceDocument())
|
---|
83 | return false;
|
---|
84 |
|
---|
85 | auto* frame = document.frame();
|
---|
86 | if (!frame)
|
---|
87 | return false;
|
---|
88 |
|
---|
89 | if (!frame->settings().developerExtrasEnabled())
|
---|
90 | return false;
|
---|
91 |
|
---|
92 | if (frame->tree().parent())
|
---|
93 | return false; // This document is not in a top frame
|
---|
94 |
|
---|
95 | return true;
|
---|
96 | }
|
---|
97 |
|
---|
98 | #endif
|
---|
99 |
|
---|
100 | class PendingCallbacks {
|
---|
101 | WTF_MAKE_FAST_ALLOCATED;
|
---|
102 | public:
|
---|
103 | void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** attributes)
|
---|
104 | {
|
---|
105 | auto callback = makeUnique<PendingStartElementNSCallback>();
|
---|
106 |
|
---|
107 | callback->xmlLocalName = xmlStrdup(xmlLocalName);
|
---|
108 | callback->xmlPrefix = xmlStrdup(xmlPrefix);
|
---|
109 | callback->xmlURI = xmlStrdup(xmlURI);
|
---|
110 | callback->numNamespaces = numNamespaces;
|
---|
111 | callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numNamespaces * 2));
|
---|
112 | for (int i = 0; i < numNamespaces * 2 ; i++)
|
---|
113 | callback->namespaces[i] = xmlStrdup(namespaces[i]);
|
---|
114 | callback->numAttributes = numAttributes;
|
---|
115 | callback->numDefaulted = numDefaulted;
|
---|
116 | callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numAttributes * 5));
|
---|
117 | for (int i = 0; i < numAttributes; i++) {
|
---|
118 | // Each attribute has 5 elements in the array:
|
---|
119 | // name, prefix, uri, value and an end pointer.
|
---|
120 |
|
---|
121 | for (int j = 0; j < 3; j++)
|
---|
122 | callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
|
---|
123 |
|
---|
124 | int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
|
---|
125 |
|
---|
126 | callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
|
---|
127 | callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
|
---|
128 | }
|
---|
129 |
|
---|
130 | m_callbacks.append(WTFMove(callback));
|
---|
131 | }
|
---|
132 |
|
---|
133 | void appendEndElementNSCallback()
|
---|
134 | {
|
---|
135 | m_callbacks.append(makeUnique<PendingEndElementNSCallback>());
|
---|
136 | }
|
---|
137 |
|
---|
138 | void appendCharactersCallback(const xmlChar* s, int len)
|
---|
139 | {
|
---|
140 | auto callback = makeUnique<PendingCharactersCallback>();
|
---|
141 |
|
---|
142 | callback->s = xmlStrndup(s, len);
|
---|
143 | callback->len = len;
|
---|
144 |
|
---|
145 | m_callbacks.append(WTFMove(callback));
|
---|
146 | }
|
---|
147 |
|
---|
148 | void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
|
---|
149 | {
|
---|
150 | auto callback = makeUnique<PendingProcessingInstructionCallback>();
|
---|
151 |
|
---|
152 | callback->target = xmlStrdup(target);
|
---|
153 | callback->data = xmlStrdup(data);
|
---|
154 |
|
---|
155 | m_callbacks.append(WTFMove(callback));
|
---|
156 | }
|
---|
157 |
|
---|
158 | void appendCDATABlockCallback(const xmlChar* s, int len)
|
---|
159 | {
|
---|
160 | auto callback = makeUnique<PendingCDATABlockCallback>();
|
---|
161 |
|
---|
162 | callback->s = xmlStrndup(s, len);
|
---|
163 | callback->len = len;
|
---|
164 |
|
---|
165 | m_callbacks.append(WTFMove(callback));
|
---|
166 | }
|
---|
167 |
|
---|
168 | void appendCommentCallback(const xmlChar* s)
|
---|
169 | {
|
---|
170 | auto callback = makeUnique<PendingCommentCallback>();
|
---|
171 |
|
---|
172 | callback->s = xmlStrdup(s);
|
---|
173 |
|
---|
174 | m_callbacks.append(WTFMove(callback));
|
---|
175 | }
|
---|
176 |
|
---|
177 | void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
|
---|
178 | {
|
---|
179 | auto callback = makeUnique<PendingInternalSubsetCallback>();
|
---|
180 |
|
---|
181 | callback->name = xmlStrdup(name);
|
---|
182 | callback->externalID = xmlStrdup(externalID);
|
---|
183 | callback->systemID = xmlStrdup(systemID);
|
---|
184 |
|
---|
185 | m_callbacks.append(WTFMove(callback));
|
---|
186 | }
|
---|
187 |
|
---|
188 | void appendErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
|
---|
189 | {
|
---|
190 | auto callback = makeUnique<PendingErrorCallback>();
|
---|
191 |
|
---|
192 | callback->message = xmlStrdup(message);
|
---|
193 | callback->type = type;
|
---|
194 | callback->lineNumber = lineNumber;
|
---|
195 | callback->columnNumber = columnNumber;
|
---|
196 |
|
---|
197 | m_callbacks.append(WTFMove(callback));
|
---|
198 | }
|
---|
199 |
|
---|
200 | void callAndRemoveFirstCallback(XMLDocumentParser* parser)
|
---|
201 | {
|
---|
202 | std::unique_ptr<PendingCallback> callback = m_callbacks.takeFirst();
|
---|
203 | callback->call(parser);
|
---|
204 | }
|
---|
205 |
|
---|
206 | bool isEmpty() const { return m_callbacks.isEmpty(); }
|
---|
207 |
|
---|
208 | private:
|
---|
209 | struct PendingCallback {
|
---|
210 | WTF_MAKE_STRUCT_FAST_ALLOCATED;
|
---|
211 | virtual ~PendingCallback() = default;
|
---|
212 | virtual void call(XMLDocumentParser* parser) = 0;
|
---|
213 | };
|
---|
214 |
|
---|
215 | struct PendingStartElementNSCallback : public PendingCallback {
|
---|
216 | virtual ~PendingStartElementNSCallback()
|
---|
217 | {
|
---|
218 | xmlFree(xmlLocalName);
|
---|
219 | xmlFree(xmlPrefix);
|
---|
220 | xmlFree(xmlURI);
|
---|
221 | for (int i = 0; i < numNamespaces * 2; i++)
|
---|
222 | xmlFree(namespaces[i]);
|
---|
223 | xmlFree(namespaces);
|
---|
224 | for (int i = 0; i < numAttributes; i++) {
|
---|
225 | for (int j = 0; j < 4; j++)
|
---|
226 | xmlFree(attributes[i * 5 + j]);
|
---|
227 | }
|
---|
228 | xmlFree(attributes);
|
---|
229 | }
|
---|
230 |
|
---|
231 | void call(XMLDocumentParser* parser) override
|
---|
232 | {
|
---|
233 | parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, const_cast<const xmlChar**>(namespaces), numAttributes, numDefaulted, const_cast<const xmlChar**>(attributes));
|
---|
234 | }
|
---|
235 |
|
---|
236 | xmlChar* xmlLocalName;
|
---|
237 | xmlChar* xmlPrefix;
|
---|
238 | xmlChar* xmlURI;
|
---|
239 | int numNamespaces;
|
---|
240 | xmlChar** namespaces;
|
---|
241 | int numAttributes;
|
---|
242 | int numDefaulted;
|
---|
243 | xmlChar** attributes;
|
---|
244 | };
|
---|
245 |
|
---|
246 | struct PendingEndElementNSCallback : public PendingCallback {
|
---|
247 | void call(XMLDocumentParser* parser) override
|
---|
248 | {
|
---|
249 | parser->endElementNs();
|
---|
250 | }
|
---|
251 | };
|
---|
252 |
|
---|
253 | struct PendingCharactersCallback : public PendingCallback {
|
---|
254 | virtual ~PendingCharactersCallback()
|
---|
255 | {
|
---|
256 | xmlFree(s);
|
---|
257 | }
|
---|
258 |
|
---|
259 | void call(XMLDocumentParser* parser) override
|
---|
260 | {
|
---|
261 | parser->characters(s, len);
|
---|
262 | }
|
---|
263 |
|
---|
264 | xmlChar* s;
|
---|
265 | int len;
|
---|
266 | };
|
---|
267 |
|
---|
268 | struct PendingProcessingInstructionCallback : public PendingCallback {
|
---|
269 | virtual ~PendingProcessingInstructionCallback()
|
---|
270 | {
|
---|
271 | xmlFree(target);
|
---|
272 | xmlFree(data);
|
---|
273 | }
|
---|
274 |
|
---|
275 | void call(XMLDocumentParser* parser) override
|
---|
276 | {
|
---|
277 | parser->processingInstruction(target, data);
|
---|
278 | }
|
---|
279 |
|
---|
280 | xmlChar* target;
|
---|
281 | xmlChar* data;
|
---|
282 | };
|
---|
283 |
|
---|
284 | struct PendingCDATABlockCallback : public PendingCallback {
|
---|
285 | virtual ~PendingCDATABlockCallback()
|
---|
286 | {
|
---|
287 | xmlFree(s);
|
---|
288 | }
|
---|
289 |
|
---|
290 | void call(XMLDocumentParser* parser) override
|
---|
291 | {
|
---|
292 | parser->cdataBlock(s, len);
|
---|
293 | }
|
---|
294 |
|
---|
295 | xmlChar* s;
|
---|
296 | int len;
|
---|
297 | };
|
---|
298 |
|
---|
299 | struct PendingCommentCallback : public PendingCallback {
|
---|
300 | virtual ~PendingCommentCallback()
|
---|
301 | {
|
---|
302 | xmlFree(s);
|
---|
303 | }
|
---|
304 |
|
---|
305 | void call(XMLDocumentParser* parser) override
|
---|
306 | {
|
---|
307 | parser->comment(s);
|
---|
308 | }
|
---|
309 |
|
---|
310 | xmlChar* s;
|
---|
311 | };
|
---|
312 |
|
---|
313 | struct PendingInternalSubsetCallback : public PendingCallback {
|
---|
314 | virtual ~PendingInternalSubsetCallback()
|
---|
315 | {
|
---|
316 | xmlFree(name);
|
---|
317 | xmlFree(externalID);
|
---|
318 | xmlFree(systemID);
|
---|
319 | }
|
---|
320 |
|
---|
321 | void call(XMLDocumentParser* parser) override
|
---|
322 | {
|
---|
323 | parser->internalSubset(name, externalID, systemID);
|
---|
324 | }
|
---|
325 |
|
---|
326 | xmlChar* name;
|
---|
327 | xmlChar* externalID;
|
---|
328 | xmlChar* systemID;
|
---|
329 | };
|
---|
330 |
|
---|
331 | struct PendingErrorCallback: public PendingCallback {
|
---|
332 | virtual ~PendingErrorCallback()
|
---|
333 | {
|
---|
334 | xmlFree(message);
|
---|
335 | }
|
---|
336 |
|
---|
337 | void call(XMLDocumentParser* parser) override
|
---|
338 | {
|
---|
339 | parser->handleError(type, reinterpret_cast<char*>(message), TextPosition(lineNumber, columnNumber));
|
---|
340 | }
|
---|
341 |
|
---|
342 | XMLErrors::ErrorType type;
|
---|
343 | xmlChar* message;
|
---|
344 | OrdinalNumber lineNumber;
|
---|
345 | OrdinalNumber columnNumber;
|
---|
346 | };
|
---|
347 |
|
---|
348 | Deque<std::unique_ptr<PendingCallback>> m_callbacks;
|
---|
349 | };
|
---|
350 | // --------------------------------
|
---|
351 |
|
---|
352 | static int globalDescriptor = 0;
|
---|
353 | static Thread* libxmlLoaderThread { nullptr };
|
---|
354 |
|
---|
355 | static int matchFunc(const char*)
|
---|
356 | {
|
---|
357 | // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
|
---|
358 | // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
|
---|
359 | return XMLDocumentParserScope::currentCachedResourceLoader && libxmlLoaderThread == &Thread::current();
|
---|
360 | }
|
---|
361 |
|
---|
362 | class OffsetBuffer {
|
---|
363 | WTF_MAKE_FAST_ALLOCATED;
|
---|
364 | public:
|
---|
365 | OffsetBuffer(Vector<uint8_t>&& buffer)
|
---|
366 | : m_buffer(WTFMove(buffer))
|
---|
367 | {
|
---|
368 | }
|
---|
369 |
|
---|
370 | int readOutBytes(char* outputBuffer, unsigned askedToRead)
|
---|
371 | {
|
---|
372 | unsigned bytesLeft = m_buffer.size() - m_currentOffset;
|
---|
373 | unsigned lenToCopy = std::min(askedToRead, bytesLeft);
|
---|
374 | if (lenToCopy) {
|
---|
375 | memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
|
---|
376 | m_currentOffset += lenToCopy;
|
---|
377 | }
|
---|
378 | return lenToCopy;
|
---|
379 | }
|
---|
380 |
|
---|
381 | private:
|
---|
382 | Vector<uint8_t> m_buffer;
|
---|
383 | unsigned m_currentOffset { 0 };
|
---|
384 | };
|
---|
385 |
|
---|
386 | static bool externalEntityMimeTypeAllowed(const ResourceResponse& response)
|
---|
387 | {
|
---|
388 | String contentType = response.httpHeaderField(HTTPHeaderName::ContentType);
|
---|
389 | String mimeType = extractMIMETypeFromMediaType(contentType);
|
---|
390 | if (mimeType.isEmpty()) {
|
---|
391 | // Same logic as XMLHttpRequest::responseMIMEType(). Keep them in sync.
|
---|
392 | if (response.isInHTTPFamily())
|
---|
393 | mimeType = contentType;
|
---|
394 | else
|
---|
395 | mimeType = response.mimeType();
|
---|
396 | }
|
---|
397 | return MIMETypeRegistry::isXMLMIMEType(mimeType) || MIMETypeRegistry::isXMLEntityMIMEType(mimeType);
|
---|
398 | }
|
---|
399 |
|
---|
400 | static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
|
---|
401 | {
|
---|
402 | if (!scriptingContentIsAllowed(parserContentPolicy))
|
---|
403 | element->stripScriptingAttributes(attributeVector);
|
---|
404 | element->parserSetAttributes(attributeVector);
|
---|
405 | }
|
---|
406 |
|
---|
407 | static void switchToUTF16(xmlParserCtxtPtr ctxt)
|
---|
408 | {
|
---|
409 | // Hack around libxml2's lack of encoding overide support by manually
|
---|
410 | // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
|
---|
411 | // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
|
---|
412 | // and switch encodings, causing the parse to fail.
|
---|
413 |
|
---|
414 | // FIXME: Can we just use XML_PARSE_IGNORE_ENC now?
|
---|
415 |
|
---|
416 | const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&byteOrderMark);
|
---|
417 | xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
|
---|
418 | }
|
---|
419 |
|
---|
420 | static bool shouldAllowExternalLoad(const URL& url)
|
---|
421 | {
|
---|
422 | String urlString = url.string();
|
---|
423 |
|
---|
424 | // On non-Windows platforms libxml asks for this URL, the "XML_XML_DEFAULT_CATALOG", on initialization.
|
---|
425 | if (urlString == "file:///etc/xml/catalog"_s)
|
---|
426 | return false;
|
---|
427 |
|
---|
428 | // On Windows, libxml computes a URL relative to where its DLL resides.
|
---|
429 | if (startsWithLettersIgnoringASCIICase(urlString, "file:///"_s) && urlString.endsWithIgnoringASCIICase("/etc/catalog"_s))
|
---|
430 | return false;
|
---|
431 |
|
---|
432 | // The most common DTD. There isn't much point in hammering www.w3c.org by requesting this for every XHTML document.
|
---|
433 | if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/tr/xhtml"_s))
|
---|
434 | return false;
|
---|
435 |
|
---|
436 | // Similarly, there isn't much point in requesting the SVG DTD.
|
---|
437 | if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/graphics/svg"_s))
|
---|
438 | return false;
|
---|
439 |
|
---|
440 | // The libxml doesn't give us a lot of context for deciding whether to
|
---|
441 | // allow this request. In the worst case, this load could be for an
|
---|
442 | // external entity and the resulting document could simply read the
|
---|
443 | // retrieved content. If we had more context, we could potentially allow
|
---|
444 | // the parser to load a DTD. As things stand, we take the conservative
|
---|
445 | // route and allow same-origin requests only.
|
---|
446 | if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin().canRequest(url)) {
|
---|
447 | XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
|
---|
448 | return false;
|
---|
449 | }
|
---|
450 |
|
---|
451 | return true;
|
---|
452 | }
|
---|
453 |
|
---|
454 | static void* openFunc(const char* uri)
|
---|
455 | {
|
---|
456 | ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
|
---|
457 | ASSERT(libxmlLoaderThread == &Thread::current());
|
---|
458 |
|
---|
459 | CachedResourceLoader& cachedResourceLoader = *XMLDocumentParserScope::currentCachedResourceLoader;
|
---|
460 | Document* document = cachedResourceLoader.document();
|
---|
461 | // Same logic as HTMLBaseElement::href(). Keep them in sync.
|
---|
462 | auto* encoding = (document && document->decoder()) ? document->decoder()->encodingForURLParsing() : nullptr;
|
---|
463 | URL url(document ? document->fallbackBaseURL() : URL(), stripLeadingAndTrailingHTMLSpaces(String::fromLatin1(uri)), encoding);
|
---|
464 |
|
---|
465 | if (!shouldAllowExternalLoad(url))
|
---|
466 | return &globalDescriptor;
|
---|
467 |
|
---|
468 | ResourceResponse response;
|
---|
469 | RefPtr<SharedBuffer> data;
|
---|
470 |
|
---|
471 | {
|
---|
472 | ResourceError error;
|
---|
473 | XMLDocumentParserScope scope(nullptr);
|
---|
474 | // FIXME: We should restore the original global error handler as well.
|
---|
475 |
|
---|
476 | if (cachedResourceLoader.frame()) {
|
---|
477 | FetchOptions options;
|
---|
478 | options.mode = FetchOptions::Mode::SameOrigin;
|
---|
479 | options.credentials = FetchOptions::Credentials::Include;
|
---|
480 | cachedResourceLoader.frame()->loader().loadResourceSynchronously(url, ClientCredentialPolicy::MayAskClientForCredentials, options, { }, error, response, data);
|
---|
481 |
|
---|
482 | if (response.url().isEmpty()) {
|
---|
483 | if (Page* page = document ? document->page() : nullptr)
|
---|
484 | page->console().addMessage(MessageSource::Security, MessageLevel::Error, makeString("Did not parse external entity resource at '", url.stringCenterEllipsizedToLength(), "' because cross-origin loads are not allowed."));
|
---|
485 | return &globalDescriptor;
|
---|
486 | }
|
---|
487 | if (!externalEntityMimeTypeAllowed(response)) {
|
---|
488 | if (Page* page = document ? document->page() : nullptr)
|
---|
489 | page->console().addMessage(MessageSource::Security, MessageLevel::Error, makeString("Did not parse external entity resource at '", url.stringCenterEllipsizedToLength(), "' because only XML MIME types are allowed."));
|
---|
490 | return &globalDescriptor;
|
---|
491 | }
|
---|
492 | }
|
---|
493 | }
|
---|
494 |
|
---|
495 | if (!data)
|
---|
496 | return &globalDescriptor;
|
---|
497 |
|
---|
498 | return new OffsetBuffer({ data->data(), data->size() });
|
---|
499 | }
|
---|
500 |
|
---|
501 | static int readFunc(void* context, char* buffer, int len)
|
---|
502 | {
|
---|
503 | // Do 0-byte reads in case of a null descriptor
|
---|
504 | if (context == &globalDescriptor)
|
---|
505 | return 0;
|
---|
506 |
|
---|
507 | OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
|
---|
508 | return data->readOutBytes(buffer, len);
|
---|
509 | }
|
---|
510 |
|
---|
511 | static int writeFunc(void*, const char*, int)
|
---|
512 | {
|
---|
513 | // Always just do 0-byte writes
|
---|
514 | return 0;
|
---|
515 | }
|
---|
516 |
|
---|
517 | static int closeFunc(void* context)
|
---|
518 | {
|
---|
519 | if (context != &globalDescriptor) {
|
---|
520 | OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
|
---|
521 | delete data;
|
---|
522 | }
|
---|
523 | return 0;
|
---|
524 | }
|
---|
525 |
|
---|
526 | #if ENABLE(XSLT)
|
---|
527 | static void errorFunc(void*, const char*, ...)
|
---|
528 | {
|
---|
529 | // FIXME: It would be nice to display error messages somewhere.
|
---|
530 | }
|
---|
531 | #endif
|
---|
532 |
|
---|
533 | static void initializeXMLParser()
|
---|
534 | {
|
---|
535 | static std::once_flag flag;
|
---|
536 | std::call_once(flag, [&] {
|
---|
537 | xmlInitParser();
|
---|
538 | xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
|
---|
539 | xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
|
---|
540 | libxmlLoaderThread = &Thread::current();
|
---|
541 | });
|
---|
542 | }
|
---|
543 |
|
---|
544 | Ref<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
|
---|
545 | {
|
---|
546 | initializeXMLParser();
|
---|
547 |
|
---|
548 | xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
|
---|
549 | parser->_private = userData;
|
---|
550 |
|
---|
551 | // Substitute entities.
|
---|
552 | xmlCtxtUseOptions(parser, XML_PARSE_NOENT | XML_PARSE_HUGE);
|
---|
553 |
|
---|
554 | switchToUTF16(parser);
|
---|
555 |
|
---|
556 | return adoptRef(*new XMLParserContext(parser));
|
---|
557 | }
|
---|
558 |
|
---|
559 |
|
---|
560 | // Chunk should be encoded in UTF-8
|
---|
561 | RefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
|
---|
562 | {
|
---|
563 | initializeXMLParser();
|
---|
564 |
|
---|
565 | // appendFragmentSource() checks that the length doesn't overflow an int.
|
---|
566 | xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
|
---|
567 |
|
---|
568 | if (!parser)
|
---|
569 | return nullptr;
|
---|
570 |
|
---|
571 | memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
|
---|
572 |
|
---|
573 | // Substitute entities.
|
---|
574 | // FIXME: Why is XML_PARSE_NODICT needed? This is different from what createStringParser does.
|
---|
575 | xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT | XML_PARSE_HUGE);
|
---|
576 |
|
---|
577 | // Internal initialization
|
---|
578 | parser->sax2 = 1;
|
---|
579 | parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
|
---|
580 | parser->depth = 0;
|
---|
581 | parser->str_xml = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xml")), 3);
|
---|
582 | parser->str_xmlns = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xmlns")), 5);
|
---|
583 | parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
|
---|
584 | parser->_private = userData;
|
---|
585 |
|
---|
586 | return adoptRef(*new XMLParserContext(parser));
|
---|
587 | }
|
---|
588 |
|
---|
589 | // --------------------------------
|
---|
590 |
|
---|
591 | bool XMLDocumentParser::supportsXMLVersion(const String& version)
|
---|
592 | {
|
---|
593 | return version == "1.0"_s;
|
---|
594 | }
|
---|
595 |
|
---|
596 | XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
|
---|
597 | : ScriptableDocumentParser(document)
|
---|
598 | , m_view(frameView)
|
---|
599 | , m_pendingCallbacks(makeUnique<PendingCallbacks>())
|
---|
600 | , m_currentNode(&document)
|
---|
601 | , m_scriptStartPosition(TextPosition::belowRangePosition())
|
---|
602 | {
|
---|
603 | }
|
---|
604 |
|
---|
605 | XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, HashMap<AtomString, AtomString>&& prefixToNamespaceMap, const AtomString& defaultNamespaceURI, ParserContentPolicy parserContentPolicy)
|
---|
606 | : ScriptableDocumentParser(fragment.document(), parserContentPolicy)
|
---|
607 | , m_pendingCallbacks(makeUnique<PendingCallbacks>())
|
---|
608 | , m_currentNode(&fragment)
|
---|
609 | , m_scriptStartPosition(TextPosition::belowRangePosition())
|
---|
610 | , m_parsingFragment(true)
|
---|
611 | , m_prefixToNamespaceMap(WTFMove(prefixToNamespaceMap))
|
---|
612 | , m_defaultNamespaceURI(defaultNamespaceURI)
|
---|
613 | {
|
---|
614 | fragment.ref();
|
---|
615 | }
|
---|
616 |
|
---|
617 | XMLParserContext::~XMLParserContext()
|
---|
618 | {
|
---|
619 | if (m_context->myDoc)
|
---|
620 | xmlFreeDoc(m_context->myDoc);
|
---|
621 | xmlFreeParserCtxt(m_context);
|
---|
622 | }
|
---|
623 |
|
---|
624 | XMLDocumentParser::~XMLDocumentParser()
|
---|
625 | {
|
---|
626 | // The XMLDocumentParser will always be detached before being destroyed.
|
---|
627 | ASSERT(m_currentNodeStack.isEmpty());
|
---|
628 | ASSERT(!m_currentNode);
|
---|
629 |
|
---|
630 | // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
|
---|
631 | if (m_pendingScript)
|
---|
632 | m_pendingScript->clearClient();
|
---|
633 | }
|
---|
634 |
|
---|
635 | void XMLDocumentParser::doWrite(const String& parseString)
|
---|
636 | {
|
---|
637 | ASSERT(!isDetached());
|
---|
638 | if (!m_context)
|
---|
639 | initializeParserContext();
|
---|
640 |
|
---|
641 | // Protect the libxml context from deletion during a callback
|
---|
642 | RefPtr<XMLParserContext> context = m_context;
|
---|
643 |
|
---|
644 | // libXML throws an error if you try to switch the encoding for an empty string.
|
---|
645 | if (parseString.length()) {
|
---|
646 | // JavaScript may cause the parser to detach during xmlParseChunk
|
---|
647 | // keep this alive until this function is done.
|
---|
648 | Ref<XMLDocumentParser> protectedThis(*this);
|
---|
649 |
|
---|
650 | XMLDocumentParserScope scope(&document()->cachedResourceLoader());
|
---|
651 |
|
---|
652 | // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
|
---|
653 | switchToUTF16(context->context());
|
---|
654 | xmlParseChunk(context->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), sizeof(UChar) * parseString.length(), 0);
|
---|
655 |
|
---|
656 | // JavaScript (which may be run under the xmlParseChunk callstack) may
|
---|
657 | // cause the parser to be stopped or detached.
|
---|
658 | if (isStopped())
|
---|
659 | return;
|
---|
660 | }
|
---|
661 |
|
---|
662 | // FIXME: Why is this here? And why is it after we process the passed source?
|
---|
663 | if (document()->decoder() && document()->decoder()->sawError()) {
|
---|
664 | // If the decoder saw an error, report it as fatal (stops parsing)
|
---|
665 | TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
|
---|
666 | handleError(XMLErrors::fatal, "Encoding error", position);
|
---|
667 | }
|
---|
668 | }
|
---|
669 |
|
---|
670 | static inline String toString(const xmlChar* string, size_t size)
|
---|
671 | {
|
---|
672 | return String::fromUTF8(reinterpret_cast<const char*>(string), size);
|
---|
673 | }
|
---|
674 |
|
---|
675 | static inline String toString(const xmlChar* string)
|
---|
676 | {
|
---|
677 | return String::fromUTF8(reinterpret_cast<const char*>(string));
|
---|
678 | }
|
---|
679 |
|
---|
680 | static inline AtomString toAtomString(const xmlChar* string, size_t size)
|
---|
681 | {
|
---|
682 | return AtomString::fromUTF8(reinterpret_cast<const char*>(string), size);
|
---|
683 | }
|
---|
684 |
|
---|
685 | static inline AtomString toAtomString(const xmlChar* string)
|
---|
686 | {
|
---|
687 | return AtomString::fromUTF8(reinterpret_cast<const char*>(string));
|
---|
688 | }
|
---|
689 |
|
---|
690 | struct _xmlSAX2Namespace {
|
---|
691 | const xmlChar* prefix;
|
---|
692 | const xmlChar* uri;
|
---|
693 | };
|
---|
694 | typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
|
---|
695 |
|
---|
696 | static inline bool handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int numNamespaces)
|
---|
697 | {
|
---|
698 | xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
|
---|
699 | for (int i = 0; i < numNamespaces; i++) {
|
---|
700 | AtomString namespaceQName = xmlnsAtom();
|
---|
701 | AtomString namespaceURI = toAtomString(namespaces[i].uri);
|
---|
702 | if (namespaces[i].prefix)
|
---|
703 | namespaceQName = makeAtomString("xmlns:", toString(namespaces[i].prefix));
|
---|
704 |
|
---|
705 | auto result = Element::parseAttributeName(XMLNSNames::xmlnsNamespaceURI, namespaceQName);
|
---|
706 | if (result.hasException())
|
---|
707 | return false;
|
---|
708 |
|
---|
709 | prefixedAttributes.append(Attribute(result.releaseReturnValue(), namespaceURI));
|
---|
710 | }
|
---|
711 | return true;
|
---|
712 | }
|
---|
713 |
|
---|
714 | struct _xmlSAX2Attributes {
|
---|
715 | const xmlChar* localname;
|
---|
716 | const xmlChar* prefix;
|
---|
717 | const xmlChar* uri;
|
---|
718 | const xmlChar* value;
|
---|
719 | const xmlChar* end;
|
---|
720 | };
|
---|
721 | typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
|
---|
722 |
|
---|
723 | static inline bool handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int numAttributes)
|
---|
724 | {
|
---|
725 | xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
|
---|
726 | for (int i = 0; i < numAttributes; i++) {
|
---|
727 | int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
|
---|
728 | AtomString attrValue = toAtomString(attributes[i].value, valueLength);
|
---|
729 | String attrPrefix = toString(attributes[i].prefix);
|
---|
730 | AtomString attrURI = attrPrefix.isEmpty() ? nullAtom() : toAtomString(attributes[i].uri);
|
---|
731 | AtomString attrQName = attrPrefix.isEmpty() ? toAtomString(attributes[i].localname) : makeAtomString(attrPrefix, ':', toString(attributes[i].localname));
|
---|
732 |
|
---|
733 | auto result = Element::parseAttributeName(attrURI, attrQName);
|
---|
734 | if (result.hasException())
|
---|
735 | return false;
|
---|
736 |
|
---|
737 | prefixedAttributes.append(Attribute(result.releaseReturnValue(), attrValue));
|
---|
738 | }
|
---|
739 | return true;
|
---|
740 | }
|
---|
741 |
|
---|
742 | // This is a hack around https://bugzilla.gnome.org/show_bug.cgi?id=502960
|
---|
743 | // Otherwise libxml doesn't include namespace for parsed entities, breaking entity
|
---|
744 | // expansion for all entities containing elements.
|
---|
745 | static inline bool hackAroundLibXMLEntityParsingBug()
|
---|
746 | {
|
---|
747 | #if LIBXML_VERSION >= 20704
|
---|
748 | // This bug has been fixed in libxml 2.7.4.
|
---|
749 | return false;
|
---|
750 | #else
|
---|
751 | return true;
|
---|
752 | #endif
|
---|
753 | }
|
---|
754 |
|
---|
755 | void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** libxmlNamespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes)
|
---|
756 | {
|
---|
757 | if (isStopped())
|
---|
758 | return;
|
---|
759 |
|
---|
760 | if (m_parserPaused) {
|
---|
761 | m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, libxmlNamespaces, numAttributes, numDefaulted, libxmlAttributes);
|
---|
762 | return;
|
---|
763 | }
|
---|
764 |
|
---|
765 | if (!updateLeafTextNode())
|
---|
766 | return;
|
---|
767 |
|
---|
768 | AtomString localName = toAtomString(xmlLocalName);
|
---|
769 | AtomString uri = toAtomString(xmlURI);
|
---|
770 | AtomString prefix = toAtomString(xmlPrefix);
|
---|
771 |
|
---|
772 | if (m_parsingFragment && uri.isNull()) {
|
---|
773 | if (!prefix.isNull())
|
---|
774 | uri = m_prefixToNamespaceMap.get(prefix);
|
---|
775 | else if (is<SVGElement>(m_currentNode) || localName == SVGNames::svgTag->localName())
|
---|
776 | uri = SVGNames::svgNamespaceURI;
|
---|
777 | else
|
---|
778 | uri = m_defaultNamespaceURI;
|
---|
779 | }
|
---|
780 |
|
---|
781 | // If libxml entity parsing is broken, transfer the currentNodes' namespaceURI to the new node,
|
---|
782 | // if we're currently expanding elements which originate from an entity declaration.
|
---|
783 | if (hackAroundLibXMLEntityParsingBug() && depthTriggeringEntityExpansion() != -1 && context()->depth > depthTriggeringEntityExpansion() && uri.isNull() && prefix.isNull())
|
---|
784 | uri = m_currentNode->namespaceURI();
|
---|
785 |
|
---|
786 | bool isFirstElement = !m_sawFirstElement;
|
---|
787 | m_sawFirstElement = true;
|
---|
788 |
|
---|
789 | QualifiedName qName(prefix, localName, uri);
|
---|
790 | auto newElement = m_currentNode->document().createElement(qName, true);
|
---|
791 |
|
---|
792 | Vector<Attribute> prefixedAttributes;
|
---|
793 | if (!handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, numNamespaces)) {
|
---|
794 | setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy());
|
---|
795 | stopParsing();
|
---|
796 | return;
|
---|
797 | }
|
---|
798 |
|
---|
799 | bool success = handleElementAttributes(prefixedAttributes, libxmlAttributes, numAttributes);
|
---|
800 | setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy());
|
---|
801 | if (!success) {
|
---|
802 | stopParsing();
|
---|
803 | return;
|
---|
804 | }
|
---|
805 |
|
---|
806 | newElement->beginParsingChildren();
|
---|
807 |
|
---|
808 | if (isScriptElement(newElement.get()))
|
---|
809 | m_scriptStartPosition = textPosition();
|
---|
810 |
|
---|
811 | m_currentNode->parserAppendChild(newElement);
|
---|
812 | if (!m_currentNode) // Synchronous DOM events may have removed the current node.
|
---|
813 | return;
|
---|
814 |
|
---|
815 | if (is<HTMLTemplateElement>(newElement))
|
---|
816 | pushCurrentNode(&downcast<HTMLTemplateElement>(newElement.get()).content());
|
---|
817 | else
|
---|
818 | pushCurrentNode(newElement.ptr());
|
---|
819 |
|
---|
820 | if (is<HTMLHtmlElement>(newElement))
|
---|
821 | downcast<HTMLHtmlElement>(newElement.get()).insertedByParser();
|
---|
822 |
|
---|
823 | if (!m_parsingFragment && isFirstElement && document()->frame())
|
---|
824 | document()->frame()->injectUserScripts(UserScriptInjectionTime::DocumentStart);
|
---|
825 | }
|
---|
826 |
|
---|
827 | void XMLDocumentParser::endElementNs()
|
---|
828 | {
|
---|
829 | if (isStopped())
|
---|
830 | return;
|
---|
831 |
|
---|
832 | if (m_parserPaused) {
|
---|
833 | m_pendingCallbacks->appendEndElementNSCallback();
|
---|
834 | return;
|
---|
835 | }
|
---|
836 |
|
---|
837 | // JavaScript can detach the parser. Make sure this is not released
|
---|
838 | // before the end of this method.
|
---|
839 | Ref<XMLDocumentParser> protectedThis(*this);
|
---|
840 |
|
---|
841 | if (!updateLeafTextNode())
|
---|
842 | return;
|
---|
843 |
|
---|
844 | RefPtr<ContainerNode> node = m_currentNode;
|
---|
845 | node->finishParsingChildren();
|
---|
846 |
|
---|
847 | // Once we reach the depth again where entity expansion started, stop executing the work-around.
|
---|
848 | if (hackAroundLibXMLEntityParsingBug() && context()->depth <= depthTriggeringEntityExpansion())
|
---|
849 | setDepthTriggeringEntityExpansion(-1);
|
---|
850 |
|
---|
851 | if (!scriptingContentIsAllowed(parserContentPolicy()) && is<Element>(*node) && isScriptElement(downcast<Element>(*node))) {
|
---|
852 | popCurrentNode();
|
---|
853 | node->remove();
|
---|
854 | return;
|
---|
855 | }
|
---|
856 |
|
---|
857 | if (!node->isElementNode() || !m_view) {
|
---|
858 | popCurrentNode();
|
---|
859 | return;
|
---|
860 | }
|
---|
861 |
|
---|
862 | auto& element = downcast<Element>(*node);
|
---|
863 |
|
---|
864 | // The element's parent may have already been removed from document.
|
---|
865 | // Parsing continues in this case, but scripts aren't executed.
|
---|
866 | if (!element.isConnected()) {
|
---|
867 | popCurrentNode();
|
---|
868 | return;
|
---|
869 | }
|
---|
870 |
|
---|
871 | if (!isScriptElement(element)) {
|
---|
872 | popCurrentNode();
|
---|
873 | return;
|
---|
874 | }
|
---|
875 |
|
---|
876 | // Don't load external scripts for standalone documents (for now).
|
---|
877 | ASSERT(!m_pendingScript);
|
---|
878 | m_requestingScript = true;
|
---|
879 |
|
---|
880 | auto& scriptElement = downcastScriptElement(element);
|
---|
881 | if (scriptElement.prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) {
|
---|
882 | // FIXME: Script execution should be shared between
|
---|
883 | // the libxml2 and Qt XMLDocumentParser implementations.
|
---|
884 |
|
---|
885 | if (scriptElement.readyToBeParserExecuted())
|
---|
886 | scriptElement.executeClassicScript(ScriptSourceCode(scriptElement.scriptContent(), URL(document()->url()), m_scriptStartPosition, JSC::SourceProviderSourceType::Program, InlineClassicScript::create(scriptElement)));
|
---|
887 | else if (scriptElement.willBeParserExecuted() && scriptElement.loadableScript()) {
|
---|
888 | m_pendingScript = PendingScript::create(scriptElement, *scriptElement.loadableScript());
|
---|
889 | m_pendingScript->setClient(*this);
|
---|
890 |
|
---|
891 | // m_pendingScript will be nullptr if script was already loaded and setClient() executed it.
|
---|
892 | if (m_pendingScript)
|
---|
893 | pauseParsing();
|
---|
894 | }
|
---|
895 |
|
---|
896 | // JavaScript may have detached the parser
|
---|
897 | if (isDetached())
|
---|
898 | return;
|
---|
899 | }
|
---|
900 | m_requestingScript = false;
|
---|
901 | popCurrentNode();
|
---|
902 | }
|
---|
903 |
|
---|
904 | void XMLDocumentParser::characters(const xmlChar* characters, int length)
|
---|
905 | {
|
---|
906 | if (isStopped())
|
---|
907 | return;
|
---|
908 |
|
---|
909 | if (m_parserPaused) {
|
---|
910 | m_pendingCallbacks->appendCharactersCallback(characters, length);
|
---|
911 | return;
|
---|
912 | }
|
---|
913 |
|
---|
914 | if (!m_leafTextNode)
|
---|
915 | createLeafTextNode();
|
---|
916 | m_bufferedText.append(characters, length);
|
---|
917 | }
|
---|
918 |
|
---|
919 | void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
|
---|
920 | {
|
---|
921 | if (isStopped())
|
---|
922 | return;
|
---|
923 |
|
---|
924 | va_list preflightArgs;
|
---|
925 | va_copy(preflightArgs, args);
|
---|
926 | size_t stringLength = vsnprintf(nullptr, 0, message, preflightArgs);
|
---|
927 | va_end(preflightArgs);
|
---|
928 |
|
---|
929 | Vector<char, 1024> buffer(stringLength + 1);
|
---|
930 | vsnprintf(buffer.data(), stringLength + 1, message, args);
|
---|
931 |
|
---|
932 | TextPosition position = textPosition();
|
---|
933 | if (m_parserPaused)
|
---|
934 | m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(buffer.data()), position.m_line, position.m_column);
|
---|
935 | else
|
---|
936 | handleError(type, buffer.data(), textPosition());
|
---|
937 | }
|
---|
938 |
|
---|
939 | void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
|
---|
940 | {
|
---|
941 | if (isStopped())
|
---|
942 | return;
|
---|
943 |
|
---|
944 | if (m_parserPaused) {
|
---|
945 | m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
|
---|
946 | return;
|
---|
947 | }
|
---|
948 |
|
---|
949 | if (!updateLeafTextNode())
|
---|
950 | return;
|
---|
951 |
|
---|
952 | auto result = m_currentNode->document().createProcessingInstruction(toString(target), toString(data));
|
---|
953 | if (result.hasException())
|
---|
954 | return;
|
---|
955 | auto pi = result.releaseReturnValue();
|
---|
956 |
|
---|
957 | pi->setCreatedByParser(true);
|
---|
958 |
|
---|
959 | m_currentNode->parserAppendChild(pi);
|
---|
960 |
|
---|
961 | pi->finishParsingChildren();
|
---|
962 |
|
---|
963 | if (pi->isCSS())
|
---|
964 | m_sawCSS = true;
|
---|
965 |
|
---|
966 | #if ENABLE(XSLT)
|
---|
967 | m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
|
---|
968 | if (m_sawXSLTransform && !document()->transformSourceDocument())
|
---|
969 | stopParsing();
|
---|
970 | #endif
|
---|
971 | }
|
---|
972 |
|
---|
973 | void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
|
---|
974 | {
|
---|
975 | if (isStopped())
|
---|
976 | return;
|
---|
977 |
|
---|
978 | if (m_parserPaused) {
|
---|
979 | m_pendingCallbacks->appendCDATABlockCallback(s, len);
|
---|
980 | return;
|
---|
981 | }
|
---|
982 |
|
---|
983 | if (!updateLeafTextNode())
|
---|
984 | return;
|
---|
985 |
|
---|
986 | m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), toString(s, len)));
|
---|
987 | }
|
---|
988 |
|
---|
989 | void XMLDocumentParser::comment(const xmlChar* s)
|
---|
990 | {
|
---|
991 | if (isStopped())
|
---|
992 | return;
|
---|
993 |
|
---|
994 | if (m_parserPaused) {
|
---|
995 | m_pendingCallbacks->appendCommentCallback(s);
|
---|
996 | return;
|
---|
997 | }
|
---|
998 |
|
---|
999 | if (!updateLeafTextNode())
|
---|
1000 | return;
|
---|
1001 |
|
---|
1002 | m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), toString(s)));
|
---|
1003 | }
|
---|
1004 |
|
---|
1005 | enum StandaloneInfo {
|
---|
1006 | StandaloneUnspecified = -2,
|
---|
1007 | NoXMlDeclaration,
|
---|
1008 | StandaloneNo,
|
---|
1009 | StandaloneYes
|
---|
1010 | };
|
---|
1011 |
|
---|
1012 | void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
|
---|
1013 | {
|
---|
1014 | StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
|
---|
1015 | if (standaloneInfo == NoXMlDeclaration) {
|
---|
1016 | document()->setHasXMLDeclaration(false);
|
---|
1017 | return;
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | if (version)
|
---|
1021 | document()->setXMLVersion(toString(version));
|
---|
1022 | if (standalone != StandaloneUnspecified)
|
---|
1023 | document()->setXMLStandalone(standaloneInfo == StandaloneYes);
|
---|
1024 | if (encoding)
|
---|
1025 | document()->setXMLEncoding(toString(encoding));
|
---|
1026 | document()->setHasXMLDeclaration(true);
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | void XMLDocumentParser::endDocument()
|
---|
1030 | {
|
---|
1031 | updateLeafTextNode();
|
---|
1032 | }
|
---|
1033 |
|
---|
1034 | void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
|
---|
1035 | {
|
---|
1036 | if (isStopped())
|
---|
1037 | return;
|
---|
1038 |
|
---|
1039 | if (m_parserPaused) {
|
---|
1040 | m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
|
---|
1041 | return;
|
---|
1042 | }
|
---|
1043 |
|
---|
1044 | if (document())
|
---|
1045 | document()->parserAppendChild(DocumentType::create(*document(), toString(name), toString(externalID), toString(systemID)));
|
---|
1046 | }
|
---|
1047 |
|
---|
1048 | static inline XMLDocumentParser* getParser(void* closure)
|
---|
1049 | {
|
---|
1050 | xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
|
---|
1051 | return static_cast<XMLDocumentParser*>(ctxt->_private);
|
---|
1052 | }
|
---|
1053 |
|
---|
1054 | // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
|
---|
1055 | // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
|
---|
1056 | static inline bool hackAroundLibXMLEntityBug(void* closure)
|
---|
1057 | {
|
---|
1058 | #if LIBXML_VERSION >= 20627
|
---|
1059 | // This bug has been fixed in libxml 2.6.27.
|
---|
1060 | UNUSED_PARAM(closure);
|
---|
1061 | return false;
|
---|
1062 | #else
|
---|
1063 | return static_cast<xmlParserCtxtPtr>(closure)->node;
|
---|
1064 | #endif
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 | static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes)
|
---|
1068 | {
|
---|
1069 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1070 | return;
|
---|
1071 |
|
---|
1072 | getParser(closure)->startElementNs(localname, prefix, uri, numNamespaces, namespaces, numAttributes, numDefaulted, libxmlAttributes);
|
---|
1073 | }
|
---|
1074 |
|
---|
1075 | static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
|
---|
1076 | {
|
---|
1077 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1078 | return;
|
---|
1079 |
|
---|
1080 | getParser(closure)->endElementNs();
|
---|
1081 | }
|
---|
1082 |
|
---|
1083 | static void charactersHandler(void* closure, const xmlChar* s, int len)
|
---|
1084 | {
|
---|
1085 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1086 | return;
|
---|
1087 |
|
---|
1088 | getParser(closure)->characters(s, len);
|
---|
1089 | }
|
---|
1090 |
|
---|
1091 | static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
|
---|
1092 | {
|
---|
1093 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1094 | return;
|
---|
1095 |
|
---|
1096 | getParser(closure)->processingInstruction(target, data);
|
---|
1097 | }
|
---|
1098 |
|
---|
1099 | static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
|
---|
1100 | {
|
---|
1101 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1102 | return;
|
---|
1103 |
|
---|
1104 | getParser(closure)->cdataBlock(s, len);
|
---|
1105 | }
|
---|
1106 |
|
---|
1107 | static void commentHandler(void* closure, const xmlChar* comment)
|
---|
1108 | {
|
---|
1109 | if (hackAroundLibXMLEntityBug(closure))
|
---|
1110 | return;
|
---|
1111 |
|
---|
1112 | getParser(closure)->comment(comment);
|
---|
1113 | }
|
---|
1114 |
|
---|
1115 | WTF_ATTRIBUTE_PRINTF(2, 3)
|
---|
1116 | static void warningHandler(void* closure, const char* message, ...)
|
---|
1117 | {
|
---|
1118 | va_list args;
|
---|
1119 | va_start(args, message);
|
---|
1120 | getParser(closure)->error(XMLErrors::warning, message, args);
|
---|
1121 | va_end(args);
|
---|
1122 | }
|
---|
1123 |
|
---|
1124 | WTF_ATTRIBUTE_PRINTF(2, 3)
|
---|
1125 | static void fatalErrorHandler(void* closure, const char* message, ...)
|
---|
1126 | {
|
---|
1127 | va_list args;
|
---|
1128 | va_start(args, message);
|
---|
1129 | getParser(closure)->error(XMLErrors::fatal, message, args);
|
---|
1130 | va_end(args);
|
---|
1131 | }
|
---|
1132 |
|
---|
1133 | WTF_ATTRIBUTE_PRINTF(2, 3)
|
---|
1134 | static void normalErrorHandler(void* closure, const char* message, ...)
|
---|
1135 | {
|
---|
1136 | va_list args;
|
---|
1137 | va_start(args, message);
|
---|
1138 | getParser(closure)->error(XMLErrors::nonFatal, message, args);
|
---|
1139 | va_end(args);
|
---|
1140 | }
|
---|
1141 |
|
---|
1142 | // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
|
---|
1143 | // a hack to avoid malloc/free. Using a global variable like this could cause trouble
|
---|
1144 | // if libxml implementation details were to change
|
---|
1145 | static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
---|
1146 |
|
---|
1147 | static xmlEntityPtr sharedXHTMLEntity()
|
---|
1148 | {
|
---|
1149 | static xmlEntity entity;
|
---|
1150 | if (!entity.type) {
|
---|
1151 | entity.type = XML_ENTITY_DECL;
|
---|
1152 | entity.orig = sharedXHTMLEntityResult;
|
---|
1153 | entity.content = sharedXHTMLEntityResult;
|
---|
1154 | entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
|
---|
1155 | }
|
---|
1156 | return &entity;
|
---|
1157 | }
|
---|
1158 |
|
---|
1159 | static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
|
---|
1160 | {
|
---|
1161 | const char* originalTarget = target;
|
---|
1162 | auto conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, utf16Entity + numberOfCodeUnits, &target, target + targetSize);
|
---|
1163 | if (conversionResult != WTF::Unicode::ConversionOK)
|
---|
1164 | return 0;
|
---|
1165 |
|
---|
1166 | // Even though we must pass the length, libxml expects the entity string to be null terminated.
|
---|
1167 | ASSERT(target >= originalTarget + 1);
|
---|
1168 | *target = '\0';
|
---|
1169 | return target - originalTarget;
|
---|
1170 | }
|
---|
1171 |
|
---|
1172 | static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
|
---|
1173 | {
|
---|
1174 | UChar utf16DecodedEntity[4];
|
---|
1175 | size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
|
---|
1176 | if (!numberOfCodeUnits)
|
---|
1177 | return 0;
|
---|
1178 |
|
---|
1179 | ASSERT(numberOfCodeUnits <= 4);
|
---|
1180 | size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
|
---|
1181 | reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
|
---|
1182 | if (!entityLengthInUTF8)
|
---|
1183 | return 0;
|
---|
1184 |
|
---|
1185 | xmlEntityPtr entity = sharedXHTMLEntity();
|
---|
1186 | entity->length = entityLengthInUTF8;
|
---|
1187 | entity->name = name;
|
---|
1188 | return entity;
|
---|
1189 | }
|
---|
1190 |
|
---|
1191 | static void entityDeclarationHandler(void* closure, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
|
---|
1192 | {
|
---|
1193 | // Prevent the next call to getEntityHandler() to record the entity expansion depth.
|
---|
1194 | // We're parsing the entity declaration, so there's no need to record anything.
|
---|
1195 | // We only need to record the depth, if we're actually expanding the entity, when it's referenced.
|
---|
1196 | if (hackAroundLibXMLEntityParsingBug())
|
---|
1197 | getParser(closure)->setIsParsingEntityDeclaration(true);
|
---|
1198 | xmlSAX2EntityDecl(closure, name, type, publicId, systemId, content);
|
---|
1199 | }
|
---|
1200 |
|
---|
1201 | static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
|
---|
1202 | {
|
---|
1203 | xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
|
---|
1204 |
|
---|
1205 | XMLDocumentParser* parser = getParser(closure);
|
---|
1206 | if (hackAroundLibXMLEntityParsingBug()) {
|
---|
1207 | if (parser->isParsingEntityDeclaration()) {
|
---|
1208 | // We're parsing the entity declarations (not an entity reference), no need to do anything special.
|
---|
1209 | parser->setIsParsingEntityDeclaration(false);
|
---|
1210 | ASSERT(parser->depthTriggeringEntityExpansion() == -1);
|
---|
1211 | } else {
|
---|
1212 | // The entity will be used and eventually expanded. Record the current parser depth
|
---|
1213 | // so the next call to startElementNs() knows that the new element originates from
|
---|
1214 | // an entity declaration.
|
---|
1215 | parser->setDepthTriggeringEntityExpansion(ctxt->depth);
|
---|
1216 | }
|
---|
1217 | }
|
---|
1218 |
|
---|
1219 | xmlEntityPtr ent = xmlGetPredefinedEntity(name);
|
---|
1220 | if (ent) {
|
---|
1221 | ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
|
---|
1222 | return ent;
|
---|
1223 | }
|
---|
1224 |
|
---|
1225 | ent = xmlGetDocEntity(ctxt->myDoc, name);
|
---|
1226 | if (!ent && parser->isXHTMLDocument()) {
|
---|
1227 | ent = getXHTMLEntity(name);
|
---|
1228 | if (ent)
|
---|
1229 | ent->etype = XML_INTERNAL_GENERAL_ENTITY;
|
---|
1230 | }
|
---|
1231 |
|
---|
1232 | return ent;
|
---|
1233 | }
|
---|
1234 |
|
---|
1235 | static void startDocumentHandler(void* closure)
|
---|
1236 | {
|
---|
1237 | xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
|
---|
1238 | switchToUTF16(ctxt);
|
---|
1239 | getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
|
---|
1240 | xmlSAX2StartDocument(closure);
|
---|
1241 | }
|
---|
1242 |
|
---|
1243 | static void endDocumentHandler(void* closure)
|
---|
1244 | {
|
---|
1245 | getParser(closure)->endDocument();
|
---|
1246 | xmlSAX2EndDocument(closure);
|
---|
1247 | }
|
---|
1248 |
|
---|
1249 | static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
|
---|
1250 | {
|
---|
1251 | getParser(closure)->internalSubset(name, externalID, systemID);
|
---|
1252 | xmlSAX2InternalSubset(closure, name, externalID, systemID);
|
---|
1253 | }
|
---|
1254 |
|
---|
1255 | static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
|
---|
1256 | {
|
---|
1257 | String extId = toString(externalId);
|
---|
1258 | if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"_s)
|
---|
1259 | || (extId == "-//W3C//DTD XHTML 1.1//EN"_s)
|
---|
1260 | || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN"_s)
|
---|
1261 | || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"_s)
|
---|
1262 | || (extId == "-//W3C//DTD XHTML Basic 1.0//EN"_s)
|
---|
1263 | || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"_s)
|
---|
1264 | || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"_s)
|
---|
1265 | || (extId == "-//W3C//DTD MathML 2.0//EN"_s)
|
---|
1266 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"_s)
|
---|
1267 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"_s)
|
---|
1268 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"_s))
|
---|
1269 | getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
|
---|
1270 | }
|
---|
1271 |
|
---|
1272 | static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
|
---|
1273 | {
|
---|
1274 | // nothing to do, but we need this to work around a crasher
|
---|
1275 | // http://bugzilla.gnome.org/show_bug.cgi?id=172255
|
---|
1276 | // http://bugs.webkit.org/show_bug.cgi?id=5792
|
---|
1277 | }
|
---|
1278 |
|
---|
1279 | void XMLDocumentParser::initializeParserContext(const CString& chunk)
|
---|
1280 | {
|
---|
1281 | xmlSAXHandler sax;
|
---|
1282 | memset(&sax, 0, sizeof(sax));
|
---|
1283 |
|
---|
1284 | sax.error = normalErrorHandler;
|
---|
1285 | sax.fatalError = fatalErrorHandler;
|
---|
1286 | sax.characters = charactersHandler;
|
---|
1287 | sax.processingInstruction = processingInstructionHandler;
|
---|
1288 | sax.cdataBlock = cdataBlockHandler;
|
---|
1289 | sax.comment = commentHandler;
|
---|
1290 | sax.warning = warningHandler;
|
---|
1291 | sax.startElementNs = startElementNsHandler;
|
---|
1292 | sax.endElementNs = endElementNsHandler;
|
---|
1293 | sax.getEntity = getEntityHandler;
|
---|
1294 | sax.startDocument = startDocumentHandler;
|
---|
1295 | sax.endDocument = endDocumentHandler;
|
---|
1296 | sax.internalSubset = internalSubsetHandler;
|
---|
1297 | sax.externalSubset = externalSubsetHandler;
|
---|
1298 | sax.ignorableWhitespace = ignorableWhitespaceHandler;
|
---|
1299 | sax.entityDecl = entityDeclarationHandler;
|
---|
1300 | sax.initialized = XML_SAX2_MAGIC;
|
---|
1301 | DocumentParser::startParsing();
|
---|
1302 | m_sawError = false;
|
---|
1303 | m_sawCSS = false;
|
---|
1304 | m_sawXSLTransform = false;
|
---|
1305 | m_sawFirstElement = false;
|
---|
1306 |
|
---|
1307 | XMLDocumentParserScope scope(&document()->cachedResourceLoader());
|
---|
1308 | if (m_parsingFragment)
|
---|
1309 | m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
|
---|
1310 | else {
|
---|
1311 | ASSERT(!chunk.data());
|
---|
1312 | m_context = XMLParserContext::createStringParser(&sax, this);
|
---|
1313 | }
|
---|
1314 | }
|
---|
1315 |
|
---|
1316 | void XMLDocumentParser::doEnd()
|
---|
1317 | {
|
---|
1318 | if (!isStopped()) {
|
---|
1319 | if (m_context) {
|
---|
1320 | // Tell libxml we're done.
|
---|
1321 | {
|
---|
1322 | XMLDocumentParserScope scope(&document()->cachedResourceLoader());
|
---|
1323 | xmlParseChunk(context(), 0, 0, 1);
|
---|
1324 | }
|
---|
1325 |
|
---|
1326 | m_context = nullptr;
|
---|
1327 | }
|
---|
1328 | }
|
---|
1329 |
|
---|
1330 | #if ENABLE(XSLT)
|
---|
1331 | bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && shouldRenderInXMLTreeViewerMode(*document());
|
---|
1332 | if (xmlViewerMode) {
|
---|
1333 | XMLTreeViewer xmlTreeViewer(*document());
|
---|
1334 | xmlTreeViewer.transformDocumentToTreeView();
|
---|
1335 | } else if (m_sawXSLTransform) {
|
---|
1336 | xmlDocPtr doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform.toString(), document()->url().string());
|
---|
1337 | document()->setTransformSource(makeUnique<TransformSource>(doc));
|
---|
1338 |
|
---|
1339 | document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
|
---|
1340 | document()->applyPendingXSLTransformsNowIfScheduled();
|
---|
1341 |
|
---|
1342 | // styleResolverChanged() call can detach the parser and null out its document.
|
---|
1343 | // In that case, we just bail out.
|
---|
1344 | if (isDetached())
|
---|
1345 | return;
|
---|
1346 |
|
---|
1347 | document()->setParsing(true);
|
---|
1348 | DocumentParser::stopParsing();
|
---|
1349 | }
|
---|
1350 | #endif
|
---|
1351 | }
|
---|
1352 |
|
---|
1353 | #if ENABLE(XSLT)
|
---|
1354 | static inline const char* nativeEndianUTF16Encoding()
|
---|
1355 | {
|
---|
1356 | const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&byteOrderMark);
|
---|
1357 | return BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE";
|
---|
1358 | }
|
---|
1359 |
|
---|
1360 | xmlDocPtr xmlDocPtrForString(CachedResourceLoader& cachedResourceLoader, const String& source, const String& url)
|
---|
1361 | {
|
---|
1362 | if (source.isEmpty())
|
---|
1363 | return nullptr;
|
---|
1364 |
|
---|
1365 | // Parse in a single chunk into an xmlDocPtr
|
---|
1366 | // FIXME: Hook up error handlers so that a failure to parse the main document results in
|
---|
1367 | // good error messages.
|
---|
1368 |
|
---|
1369 | const bool is8Bit = source.is8Bit();
|
---|
1370 | const char* characters = is8Bit ? reinterpret_cast<const char*>(source.characters8()) : reinterpret_cast<const char*>(source.characters16());
|
---|
1371 | size_t sizeInBytes = source.length() * (is8Bit ? sizeof(LChar) : sizeof(UChar));
|
---|
1372 | const char* encoding = is8Bit ? "iso-8859-1" : nativeEndianUTF16Encoding();
|
---|
1373 |
|
---|
1374 | XMLDocumentParserScope scope(&cachedResourceLoader, errorFunc);
|
---|
1375 | return xmlReadMemory(characters, sizeInBytes, url.latin1().data(), encoding, XSLT_PARSE_OPTIONS);
|
---|
1376 | }
|
---|
1377 | #endif
|
---|
1378 |
|
---|
1379 | TextPosition XMLDocumentParser::textPosition() const
|
---|
1380 | {
|
---|
1381 | xmlParserCtxtPtr context = this->context();
|
---|
1382 | if (!context)
|
---|
1383 | return TextPosition();
|
---|
1384 | return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
|
---|
1385 | OrdinalNumber::fromOneBasedInt(context->input->col));
|
---|
1386 | }
|
---|
1387 |
|
---|
1388 | bool XMLDocumentParser::shouldAssociateConsoleMessagesWithTextPosition() const
|
---|
1389 | {
|
---|
1390 | return !m_parserPaused && !m_requestingScript;
|
---|
1391 | }
|
---|
1392 |
|
---|
1393 | void XMLDocumentParser::stopParsing()
|
---|
1394 | {
|
---|
1395 | if (m_sawError)
|
---|
1396 | insertErrorMessageBlock();
|
---|
1397 |
|
---|
1398 | DocumentParser::stopParsing();
|
---|
1399 | if (context())
|
---|
1400 | xmlStopParser(context());
|
---|
1401 | }
|
---|
1402 |
|
---|
1403 | void XMLDocumentParser::resumeParsing()
|
---|
1404 | {
|
---|
1405 | ASSERT(!isDetached());
|
---|
1406 | ASSERT(m_parserPaused);
|
---|
1407 |
|
---|
1408 | m_parserPaused = false;
|
---|
1409 |
|
---|
1410 | // First, execute any pending callbacks
|
---|
1411 | while (!m_pendingCallbacks->isEmpty()) {
|
---|
1412 | m_pendingCallbacks->callAndRemoveFirstCallback(this);
|
---|
1413 |
|
---|
1414 | // A callback paused the parser
|
---|
1415 | if (m_parserPaused)
|
---|
1416 | return;
|
---|
1417 | }
|
---|
1418 |
|
---|
1419 | // There is normally only one string left, so toString() shouldn't copy.
|
---|
1420 | // In any case, the XML parser runs on the main thread and it's OK if
|
---|
1421 | // the passed string has more than one reference.
|
---|
1422 | auto rest = m_pendingSrc.toString();
|
---|
1423 | m_pendingSrc.clear();
|
---|
1424 | append(rest.impl());
|
---|
1425 |
|
---|
1426 | // Finally, if finish() has been called and write() didn't result
|
---|
1427 | // in any further callbacks being queued, call end()
|
---|
1428 | if (m_finishCalled && m_pendingCallbacks->isEmpty())
|
---|
1429 | end();
|
---|
1430 | }
|
---|
1431 |
|
---|
1432 | bool XMLDocumentParser::appendFragmentSource(const String& chunk)
|
---|
1433 | {
|
---|
1434 | ASSERT(!m_context);
|
---|
1435 | ASSERT(m_parsingFragment);
|
---|
1436 |
|
---|
1437 | CString chunkAsUtf8 = chunk.utf8();
|
---|
1438 |
|
---|
1439 | // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
|
---|
1440 | if (chunkAsUtf8.length() > INT_MAX)
|
---|
1441 | return false;
|
---|
1442 |
|
---|
1443 | initializeParserContext(chunkAsUtf8);
|
---|
1444 | xmlParseContent(context());
|
---|
1445 | endDocument(); // Close any open text nodes.
|
---|
1446 |
|
---|
1447 | // FIXME: If this code is actually needed, it should probably move to finish()
|
---|
1448 | // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
|
---|
1449 | // Check if all the chunk has been processed.
|
---|
1450 | long bytesProcessed = xmlByteConsumed(context());
|
---|
1451 | if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
|
---|
1452 | // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
|
---|
1453 | // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
|
---|
1454 | ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
|
---|
1455 | return false;
|
---|
1456 | }
|
---|
1457 |
|
---|
1458 | // No error if the chunk is well formed or it is not but we have no error.
|
---|
1459 | return context()->wellFormed || !xmlCtxtGetLastError(context());
|
---|
1460 | }
|
---|
1461 |
|
---|
1462 | // --------------------------------
|
---|
1463 |
|
---|
1464 | using AttributeParseState = std::optional<HashMap<String, String>>;
|
---|
1465 |
|
---|
1466 | static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, const xmlChar* /*xmlURI*/, int /*numNamespaces*/, const xmlChar** /*namespaces*/, int numAttributes, int /*numDefaulted*/, const xmlChar** libxmlAttributes)
|
---|
1467 | {
|
---|
1468 | if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
|
---|
1469 | return;
|
---|
1470 |
|
---|
1471 | auto& state = *static_cast<AttributeParseState*>(static_cast<xmlParserCtxtPtr>(closure)->_private);
|
---|
1472 |
|
---|
1473 | state = HashMap<String, String> { };
|
---|
1474 |
|
---|
1475 | xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
|
---|
1476 | for (int i = 0; i < numAttributes; i++) {
|
---|
1477 | String attrLocalName = toString(attributes[i].localname);
|
---|
1478 | int valueLength = (int) (attributes[i].end - attributes[i].value);
|
---|
1479 | String attrValue = toString(attributes[i].value, valueLength);
|
---|
1480 | String attrPrefix = toString(attributes[i].prefix);
|
---|
1481 | String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
|
---|
1482 |
|
---|
1483 | state->set(attrQName, attrValue);
|
---|
1484 | }
|
---|
1485 | }
|
---|
1486 |
|
---|
1487 | std::optional<HashMap<String, String>> parseAttributes(const String& string)
|
---|
1488 | {
|
---|
1489 | String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
|
---|
1490 |
|
---|
1491 | AttributeParseState attributes;
|
---|
1492 |
|
---|
1493 | xmlSAXHandler sax;
|
---|
1494 | memset(&sax, 0, sizeof(sax));
|
---|
1495 | sax.startElementNs = attributesStartElementNsHandler;
|
---|
1496 | sax.initialized = XML_SAX2_MAGIC;
|
---|
1497 |
|
---|
1498 | auto parser = XMLParserContext::createStringParser(&sax, &attributes);
|
---|
1499 |
|
---|
1500 | // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
|
---|
1501 | xmlParseChunk(parser->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), parseString.length() * sizeof(UChar), 1);
|
---|
1502 |
|
---|
1503 | return attributes;
|
---|
1504 | }
|
---|
1505 |
|
---|
1506 | }
|
---|