Changeset 62410 in webkit


Ignore:
Timestamp:
Jul 2, 2010, 3:31:40 PM (15 years ago)
Author:
[email protected]
Message:

2010-07-02 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Move BOM handling out of the lexer and parser
https://bugs.webkit.org/show_bug.cgi?id=41539

Doing the BOM stripping in the lexer meant that we could
end up having to strip the BOMs from a source multiple times.
To deal with this we now require all strings provided by
a SourceProvider to already have had the BOMs stripped.
This also simplifies some of the lexer logic.

  • parser/Lexer.cpp: (JSC::Lexer::setCode): (JSC::Lexer::sourceCode):
  • parser/SourceProvider.h: (JSC::SourceProvider::SourceProvider): (JSC::UStringSourceProvider::create): (JSC::UStringSourceProvider::getRange): (JSC::UStringSourceProvider::UStringSourceProvider):
  • wtf/text/StringImpl.h: (WebCore::StringImpl::copyStringWithoutBOMs):

2010-07-02 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Move BOM handling out of the lexer and parser
https://bugs.webkit.org/show_bug.cgi?id=41539

Update WebCore to ensure that SourceProviders don't
produce strings with BOMs in them.

  • bindings/js/ScriptSourceProvider.h: (WebCore::ScriptSourceProvider::ScriptSourceProvider):
  • bindings/js/StringSourceProvider.h: (WebCore::StringSourceProvider::StringSourceProvider):
  • loader/CachedScript.cpp: (WebCore::CachedScript::CachedScript): (WebCore::CachedScript::script):
  • loader/CachedScript.h: (WebCore::CachedScript::): CachedScript now stores decoded data with the BOMs stripped, and caches the presence of BOMs across memory purges.
Location:
trunk
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r62405 r62410  
     12010-07-02  Oliver Hunt  <[email protected]>
     2
     3        Reviewed by Geoffrey Garen.
     4
     5        Move BOM handling out of the lexer and parser
     6        https://bugs.webkit.org/show_bug.cgi?id=41539
     7
     8        Doing the BOM stripping in the lexer meant that we could
     9        end up having to strip the BOMs from a source multiple times.
     10        To deal with this we now require all strings provided by
     11        a SourceProvider to already have had the BOMs stripped.
     12        This also simplifies some of the lexer logic.
     13
     14        * parser/Lexer.cpp:
     15        (JSC::Lexer::setCode):
     16        (JSC::Lexer::sourceCode):
     17        * parser/SourceProvider.h:
     18        (JSC::SourceProvider::SourceProvider):
     19        (JSC::UStringSourceProvider::create):
     20        (JSC::UStringSourceProvider::getRange):
     21        (JSC::UStringSourceProvider::UStringSourceProvider):
     22        * wtf/text/StringImpl.h:
     23        (WebCore::StringImpl::copyStringWithoutBOMs):
     24
    1252010-07-02  Renata Hodovan  <[email protected]>
    226
  • trunk/JavaScriptCore/parser/Lexer.cpp

    r62366 r62410  
    4646namespace JSC {
    4747
    48 static const UChar byteOrderMark = 0xFEFF;
    4948
    5049enum CharacterTypes {
     
    257256    m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
    258257
    259     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
    260     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
    261     if (source.provider()->hasBOMs()) {
    262         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
    263             if (UNLIKELY(*p == byteOrderMark)) {
    264                 copyCodeWithoutBOMs();
    265                 break;
    266             }
    267         }
    268     }
    269 
    270258    if (LIKELY(m_code < m_codeEnd))
    271259        m_current = *m_code;
     
    273261        m_current = -1;
    274262    ASSERT(currentOffset() == source.startOffset());
    275 }
    276 
    277 void Lexer::copyCodeWithoutBOMs()
    278 {
    279     // Note: In this case, the character offset data for debugging will be incorrect.
    280     // If it's important to correctly debug code with extraneous BOMs, then the caller
    281     // should strip the BOMs when creating the SourceProvider object and do its own
    282     // mapping of offsets within the stripped text to original text offset.
    283 
    284     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
    285     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
    286         UChar c = *p;
    287         if (c != byteOrderMark)
    288             m_codeWithoutBOMs.append(c);
    289     }
    290     ptrdiff_t startDelta = m_codeStart - m_code;
    291     m_code = m_codeWithoutBOMs.data();
    292     m_codeStart = m_code + startDelta;
    293     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
    294263}
    295264
     
    11811150SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
    11821151{
    1183     if (m_codeWithoutBOMs.isEmpty())
    1184         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
    1185 
    1186     const UChar* data = m_source->provider()->data();
    1187    
    1188     ASSERT(openBrace < closeBrace);
    1189     int i;
    1190     for (i = m_source->startOffset(); i < openBrace; ++i) {
    1191         if (data[i] == byteOrderMark) {
    1192             openBrace++;
    1193             closeBrace++;
    1194         }
    1195     }
    1196     for (; i < closeBrace; ++i) {
    1197         if (data[i] == byteOrderMark)
    1198             closeBrace++;
    1199     }
    1200 
    1201     ASSERT(openBrace < closeBrace);
    1202 
    12031152    return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
    12041153}
  • trunk/JavaScriptCore/parser/SourceProvider.h

    r44224 r62410  
    3535namespace JSC {
    3636
    37     enum SourceBOMPresence { SourceHasNoBOMs, SourceCouldHaveBOMs };
    38 
    3937    class SourceProvider : public RefCounted<SourceProvider> {
    4038    public:
    41         SourceProvider(const UString& url, SourceBOMPresence hasBOMs = SourceCouldHaveBOMs)
     39        SourceProvider(const UString& url)
    4240            : m_url(url)
    43             , m_hasBOMs(hasBOMs)
    4441        {
    4542        }
     
    5350        intptr_t asID() { return reinterpret_cast<intptr_t>(this); }
    5451
    55         SourceBOMPresence hasBOMs() const { return m_hasBOMs; }
    56 
    5752    private:
    5853        UString m_url;
    59         SourceBOMPresence m_hasBOMs;
    6054    };
    6155
    6256    class UStringSourceProvider : public SourceProvider {
    6357    public:
    64         static PassRefPtr<UStringSourceProvider> create(const UString& source, const UString& url)
     58        static PassRefPtr<UStringSourceProvider> create(const UString& source, const UString& url, bool hasBOMs = true)
    6559        {
    66             return adoptRef(new UStringSourceProvider(source, url));
     60            return adoptRef(new UStringSourceProvider(source, url, hasBOMs));
    6761        }
    6862
    69         UString getRange(int start, int end) const { return m_source.substr(start, end - start); }
     63        UString getRange(int start, int end) const
     64        {
     65            return m_source.substr(start, end - start);
     66        }
    7067        const UChar* data() const { return m_source.data(); }
    7168        int length() const { return m_source.size(); }
    7269
    7370    private:
    74         UStringSourceProvider(const UString& source, const UString& url)
     71        UStringSourceProvider(const UString& source, const UString& url, bool hasBOMs)
    7572            : SourceProvider(url)
    7673            , m_source(source)
    7774        {
     75            if (hasBOMs && m_source.size()) {
     76                bool scratch = false;
     77                m_source = UString(m_source.rep()->copyStringWithoutBOMs(false, scratch));
     78            }
    7879        }
    7980
  • trunk/JavaScriptCore/wtf/text/StringImpl.h

    r60332 r62410  
    258258    }
    259259
     260    PassRefPtr<StringImpl> copyStringWithoutBOMs(bool definitelyHasBOMs, bool& hasBOMs)
     261    {
     262        static const UChar byteOrderMark = 0xFEFF;
     263        size_t i = 0;
     264        if (!definitelyHasBOMs) {
     265            hasBOMs = false;
     266            // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
     267            // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
     268            for (; i < m_length; i++) {
     269                if (UNLIKELY(m_data[i] == byteOrderMark)) {
     270                    hasBOMs = true;
     271                    break;
     272                }
     273            }
     274            if (!hasBOMs)
     275                return this;
     276        }
     277        Vector<UChar> result;
     278        result.reserveInitialCapacity(m_length);
     279        for (; i < m_length; i++)
     280            result.append(m_data[i]);
     281        for (; i < m_length; i++) {
     282            UChar c = m_data[i];
     283            if (c != byteOrderMark)
     284                result.append(c);
     285        }
     286        return StringImpl::adopt(result);
     287    }
     288
    260289    // Returns a StringImpl suitable for use on another thread.
    261290    PassRefPtr<StringImpl> crossThreadString();
  • trunk/WebCore/ChangeLog

    r62407 r62410  
     12010-07-02  Oliver Hunt  <[email protected]>
     2
     3        Reviewed by Geoffrey Garen.
     4
     5        Move BOM handling out of the lexer and parser
     6        https://bugs.webkit.org/show_bug.cgi?id=41539
     7
     8        Update WebCore to ensure that SourceProviders don't
     9        produce strings with BOMs in them.
     10
     11        * bindings/js/ScriptSourceProvider.h:
     12        (WebCore::ScriptSourceProvider::ScriptSourceProvider):
     13        * bindings/js/StringSourceProvider.h:
     14        (WebCore::StringSourceProvider::StringSourceProvider):
     15        * loader/CachedScript.cpp:
     16        (WebCore::CachedScript::CachedScript):
     17        (WebCore::CachedScript::script):
     18        * loader/CachedScript.h:
     19        (WebCore::CachedScript::):
     20          CachedScript now stores decoded data with the BOMs stripped,
     21          and caches the presence of BOMs across memory purges.
     22
    1232010-07-02  Sam Weinig  <[email protected]>
    224
  • trunk/WebCore/bindings/js/ScriptSourceProvider.h

    r46253 r62410  
    3636    class ScriptSourceProvider : public JSC::SourceProvider {
    3737    public:
    38         ScriptSourceProvider(const JSC::UString& url, JSC::SourceBOMPresence hasBOMs = JSC::SourceCouldHaveBOMs)
    39             : SourceProvider(url, hasBOMs)
     38        ScriptSourceProvider(const JSC::UString& url)
     39            : SourceProvider(url)
    4040        {
    4141        }
  • trunk/WebCore/bindings/js/StringSourceProvider.h

    r57738 r62410  
    5050            , m_source(source)
    5151        {
     52            if (m_source.length()) {
     53                bool scratch = false;
     54                m_source = String(source.impl()->copyStringWithoutBOMs(false, scratch));
     55            }
    5256        }
    5357       
  • trunk/WebCore/loader/CachedScript.cpp

    r59576 r62410  
    3838CachedScript::CachedScript(const String& url, const String& charset)
    3939    : CachedResource(url, Script)
     40    , m_scriptHasBOMs(SourceCouldHaveBOMs)
    4041    , m_decoder(TextResourceDecoder::create("application/javascript", charset))
    4142    , m_decodedDataDeletionTimer(this, &CachedScript::decodedDataDeletionTimerFired)
     
    7980        m_script = m_decoder->decode(m_data->data(), encodedSize());
    8081        m_script += m_decoder->flush();
     82        if (m_scriptHasBOMs != SourceHasNoBOMs && m_script.length()) {
     83            bool hasBOMs = false;
     84            m_script = String(m_script.impl()->copyStringWithoutBOMs(m_scriptHasBOMs == SourceHasBOMs, hasBOMs));
     85            m_scriptHasBOMs = hasBOMs ? SourceHasBOMs : SourceHasNoBOMs;
     86        }
    8187        setDecodedSize(m_script.length() * sizeof(UChar));
    8288    }
  • trunk/WebCore/loader/CachedScript.h

    r44749 r62410  
    6060
    6161        String m_script;
     62        enum { SourceHasNoBOMs, SourceCouldHaveBOMs, SourceHasBOMs } m_scriptHasBOMs;
    6263        RefPtr<TextResourceDecoder> m_decoder;
    6364        Timer<CachedScript> m_decodedDataDeletionTimer;
Note: See TracChangeset for help on using the changeset viewer.