Ignore:
Timestamp:
Sep 24, 2010, 2:06:27 PM (15 years ago)
Author:
[email protected]
Message:

2010-09-24 Patrick Gansterer <[email protected]>

Reviewed by Gavin Barraclough.

Add WTF::StringHasher
https://bugs.webkit.org/show_bug.cgi?id=45970

StringHasher is a class for calculation stringHash out of character string.
This class will unify the different usages of the same algorithm.

  • wtf/StringHashFunctions.h: (WTF::StringHasher::StringHasher): (WTF::StringHasher::addCharacters): (WTF::StringHasher::addCharacter): (WTF::StringHasher::hash): (WTF::StringHasher::createHash): (WTF::StringHasher::defaultCoverter): (WTF::StringHasher::addCharactersToHash): (WTF::stringHash):
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/wtf/StringHashFunctions.h

    r53456 r68289  
    11/*
    22 * Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved.
     3 * Copyright (C) 2010 Patrick Gansterer <[email protected]>
    34 *
    45 * This library is free software; you can redistribute it and/or
     
    2829static const unsigned stringHashingStartValue = 0x9e3779b9U;
    2930
    30 // stringHash methods based on Paul Hsieh's SuperFastHash.
     31// Paul Hsieh's SuperFastHash
    3132// http://www.azillionmonkeys.com/qed/hash.html
    3233// char* data is interpreted as latin-encoded (zero extended to 16 bits).
     34class StringHasher {
     35public:
     36    inline StringHasher()
     37        : m_hash(stringHashingStartValue)
     38        , m_cachedCharacter(invalidCharacterValue)
     39    {
     40    }
     41
     42    inline void addCharacters(UChar a, UChar b)
     43    {
     44        ASSERT(m_cachedCharacter == invalidCharacterValue);
     45        addCharactersToHash(a, b);
     46    }
     47
     48    inline void addCharacter(UChar ch)
     49    {
     50        ASSERT(ch != invalidCharacterValue);
     51        if (m_cachedCharacter != invalidCharacterValue) {
     52            addCharactersToHash(m_cachedCharacter, ch);
     53            m_cachedCharacter = invalidCharacterValue;
     54            return;
     55        }
     56
     57        m_cachedCharacter = ch;
     58    }
     59
     60    inline unsigned hash() const
     61    {
     62        unsigned result = m_hash;
     63
     64        // Handle end case.
     65        if (m_cachedCharacter != invalidCharacterValue) {
     66            result += m_cachedCharacter;
     67            result ^= result << 11;
     68            result += result >> 17;
     69        }
     70
     71        // Force "avalanching" of final 31 bits.
     72        result ^= result << 3;
     73        result += result >> 5;
     74        result ^= result << 2;
     75        result += result >> 15;
     76        result ^= result << 10;
     77
     78        // First bit is used in UStringImpl for m_isIdentifier.
     79        result &= 0x7fffffff;
     80
     81        // This avoids ever returning a hash code of 0, since that is used to
     82        // signal "hash not computed yet", using a value that is likely to be
     83        // effectively the same as 0 when the low bits are masked.
     84        if (!result)
     85            return 0x40000000;
     86
     87        return result;
     88    }
     89
     90    template<typename T, UChar Coverter(T)> static inline unsigned createHash(const T* data, unsigned length)
     91    {
     92        StringHasher hasher;
     93        bool rem = length & 1;
     94        length >>= 1;
     95
     96        while (length--) {
     97            hasher.addCharacters(Coverter(data[0]), Coverter(data[1]));
     98            data += 2;
     99        }
     100
     101        if (rem)
     102            hasher.addCharacter(Coverter(*data));
     103
     104        return hasher.hash();
     105    }
     106
     107    template<typename T, UChar Coverter(T)> static inline unsigned createHash(const T* data)
     108    {
     109        StringHasher hasher;
     110
     111        while (true) {
     112            UChar b0 = Coverter(*data++);
     113            if (!b0)
     114                break;
     115            UChar b1 = Coverter(*data++);
     116            if (!b1) {
     117                hasher.addCharacter(b0);
     118                break;
     119            }
     120
     121            hasher.addCharacters(b0, b1);
     122        }
     123
     124        return hasher.hash();
     125    }
     126
     127    template<typename T> static inline unsigned createHash(const T* data, unsigned length)
     128    {
     129        return createHash<T, defaultCoverter>(data, length);
     130    }
     131
     132    template<typename T> static inline unsigned createHash(const T* data)
     133    {
     134        return createHash<T, defaultCoverter>(data);
     135    }
     136
     137private:
     138    static inline UChar defaultCoverter(UChar ch)
     139    {
     140        return ch;
     141    }
     142
     143    static inline UChar defaultCoverter(char ch)
     144    {
     145        return static_cast<unsigned char>(ch);
     146    }
     147
     148    inline void addCharactersToHash(UChar a, UChar b)
     149    {
     150        m_hash += a;
     151        unsigned tmp = (b << 11) ^ m_hash;
     152        m_hash = (m_hash << 16) ^ tmp;
     153        m_hash += m_hash >> 11;
     154    }
     155
     156    unsigned m_hash;
     157    UChar m_cachedCharacter;
     158
     159    static const UChar invalidCharacterValue = 0xfffe;
     160};
     161
     162
    33163
    34164inline unsigned stringHash(const UChar* data, unsigned length)
    35165{
    36     unsigned hash = WTF::stringHashingStartValue;
    37     unsigned rem = length & 1;
    38     length >>= 1;
    39 
    40     // Main loop
    41     for (; length > 0; length--) {
    42         hash += data[0];
    43         unsigned tmp = (data[1] << 11) ^ hash;
    44         hash = (hash << 16) ^ tmp;
    45         data += 2;
    46         hash += hash >> 11;
    47     }
    48 
    49     // Handle end case
    50     if (rem) {
    51         hash += data[0];
    52         hash ^= hash << 11;
    53         hash += hash >> 17;
    54     }
    55 
    56     // Force "avalanching" of final 127 bits
    57     hash ^= hash << 3;
    58     hash += hash >> 5;
    59     hash ^= hash << 2;
    60     hash += hash >> 15;
    61     hash ^= hash << 10;
    62 
    63     hash &= 0x7fffffff;
    64 
    65     // this avoids ever returning a hash code of 0, since that is used to
    66     // signal "hash not computed yet", using a value that is likely to be
    67     // effectively the same as 0 when the low bits are masked
    68     if (hash == 0)
    69         hash = 0x40000000;
    70 
    71     return hash;
     166    return StringHasher::createHash<UChar>(data, length);
    72167}
    73168
    74169inline unsigned stringHash(const char* data, unsigned length)
    75170{
    76     unsigned hash = WTF::stringHashingStartValue;
    77     unsigned rem = length & 1;
    78     length >>= 1;
    79 
    80     // Main loop
    81     for (; length > 0; length--) {
    82         hash += static_cast<unsigned char>(data[0]);
    83         unsigned tmp = (static_cast<unsigned char>(data[1]) << 11) ^ hash;
    84         hash = (hash << 16) ^ tmp;
    85         data += 2;
    86         hash += hash >> 11;
    87     }
    88 
    89     // Handle end case
    90     if (rem) {
    91         hash += static_cast<unsigned char>(data[0]);
    92         hash ^= hash << 11;
    93         hash += hash >> 17;
    94     }
    95 
    96     // Force "avalanching" of final 127 bits
    97     hash ^= hash << 3;
    98     hash += hash >> 5;
    99     hash ^= hash << 2;
    100     hash += hash >> 15;
    101     hash ^= hash << 10;
    102 
    103     hash &= 0x7fffffff;
    104 
    105     // this avoids ever returning a hash code of 0, since that is used to
    106     // signal "hash not computed yet", using a value that is likely to be
    107     // effectively the same as 0 when the low bits are masked
    108     if (hash == 0)
    109         hash = 0x40000000;
    110 
    111     return hash;
     171    return StringHasher::createHash<char>(data, length);
    112172}
    113173
    114174inline unsigned stringHash(const char* data)
    115175{
    116     unsigned hash = WTF::stringHashingStartValue;
    117 
    118     // Main loop
    119     for (;;) {
    120         unsigned char b0 = data[0];
    121         if (!b0)
    122             break;
    123         unsigned char b1 = data[1];
    124         if (!b1) {
    125             hash += b0;
    126             hash ^= hash << 11;
    127             hash += hash >> 17;
    128             break;
    129         }
    130         hash += b0;
    131         unsigned tmp = (b1 << 11) ^ hash;
    132         hash = (hash << 16) ^ tmp;
    133         data += 2;
    134         hash += hash >> 11;
    135     }
    136 
    137     // Force "avalanching" of final 127 bits.
    138     hash ^= hash << 3;
    139     hash += hash >> 5;
    140     hash ^= hash << 2;
    141     hash += hash >> 15;
    142     hash ^= hash << 10;
    143 
    144     hash &= 0x7fffffff;
    145 
    146     // This avoids ever returning a hash code of 0, since that is used to
    147     // signal "hash not computed yet", using a value that is likely to be
    148     // effectively the same as 0 when the low bits are masked.
    149     if (hash == 0)
    150         hash = 0x40000000;
    151 
    152     return hash;
     176    return StringHasher::createHash<char>(data);
    153177}
    154178
Note: See TracChangeset for help on using the changeset viewer.