// Copyright 2023 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package swiss import ( "github.com/dolthub/maphash" ) const ( maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) ) // Map is an open-addressing hash map // based on Abseil's flat_hash_map. type Map[K comparable, V any] struct { ctrl []metadata groups []group[K, V] hash maphash.Hasher[K] resident uint32 dead uint32 limit uint32 } // metadata is the h2 metadata array for a group. // find operations first probe the controls bytes // to filter candidates before matching keys type metadata [groupSize]int8 // group is a group of 16 key-value pairs type group[K comparable, V any] struct { keys [groupSize]K values [groupSize]V } const ( h1Mask uint64 = 0xffff_ffff_ffff_ff80 h2Mask uint64 = 0x0000_0000_0000_007f empty int8 = -128 // 0b1000_0000 tombstone int8 = -2 // 0b1111_1110 ) // h1 is a 57 bit hash prefix type h1 uint64 // h2 is a 7 bit hash suffix type h2 int8 // NewMap constructs a Map. func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { groups := numGroups(sz) m = &Map[K, V]{ ctrl: make([]metadata, groups), groups: make([]group[K, V], groups), hash: maphash.NewHasher[K](), limit: groups * maxAvgGroupLoad, } for i := range m.ctrl { m.ctrl[i] = newEmptyMetadata() } return } // Has returns true if |key| is present in |m|. func (m *Map[K, V]) Has(key K) (ok bool) { hi, lo := splitHash(m.hash.Hash(key)) g := probeStart(hi, len(m.groups)) for { // inlined find loop matches := metaMatchH2(&m.ctrl[g], lo) for matches != 0 { s := nextMatch(&matches) if key == m.groups[g].keys[s] { ok = true return } } // |key| is not in group |g|, // stop probing if we see an empty slot matches = metaMatchEmpty(&m.ctrl[g]) if matches != 0 { ok = false return } g += 1 // linear probing if g >= uint32(len(m.groups)) { g = 0 } } } // Get returns the |value| mapped by |key| if one exists. func (m *Map[K, V]) Get(key K) (value V, ok bool) { hi, lo := splitHash(m.hash.Hash(key)) g := probeStart(hi, len(m.groups)) for { // inlined find loop matches := metaMatchH2(&m.ctrl[g], lo) for matches != 0 { s := nextMatch(&matches) if key == m.groups[g].keys[s] { value, ok = m.groups[g].values[s], true return } } // |key| is not in group |g|, // stop probing if we see an empty slot matches = metaMatchEmpty(&m.ctrl[g]) if matches != 0 { ok = false return } g += 1 // linear probing if g >= uint32(len(m.groups)) { g = 0 } } } // Put attempts to insert |key| and |value| func (m *Map[K, V]) Put(key K, value V) { if m.resident >= m.limit { m.rehash(m.nextSize()) } hi, lo := splitHash(m.hash.Hash(key)) g := probeStart(hi, len(m.groups)) for { // inlined find loop matches := metaMatchH2(&m.ctrl[g], lo) for matches != 0 { s := nextMatch(&matches) if key == m.groups[g].keys[s] { // update m.groups[g].keys[s] = key m.groups[g].values[s] = value return } } // |key| is not in group |g|, // stop probing if we see an empty slot matches = metaMatchEmpty(&m.ctrl[g]) if matches != 0 { // insert s := nextMatch(&matches) m.groups[g].keys[s] = key m.groups[g].values[s] = value m.ctrl[g][s] = int8(lo) m.resident++ return } g += 1 // linear probing if g >= uint32(len(m.groups)) { g = 0 } } } // Delete attempts to remove |key|, returns true successful. func (m *Map[K, V]) Delete(key K) (ok bool) { hi, lo := splitHash(m.hash.Hash(key)) g := probeStart(hi, len(m.groups)) for { matches := metaMatchH2(&m.ctrl[g], lo) for matches != 0 { s := nextMatch(&matches) if key == m.groups[g].keys[s] { ok = true // optimization: if |m.ctrl[g]| contains any empty // metadata bytes, we can physically delete |key| // rather than placing a tombstone. // The observation is that any probes into group |g| // would already be terminated by the existing empty // slot, and therefore reclaiming slot |s| will not // cause premature termination of probes into |g|. if metaMatchEmpty(&m.ctrl[g]) != 0 { m.ctrl[g][s] = empty m.resident-- } else { m.ctrl[g][s] = tombstone m.dead++ } var k K var v V m.groups[g].keys[s] = k m.groups[g].values[s] = v return } } // |key| is not in group |g|, // stop probing if we see an empty slot matches = metaMatchEmpty(&m.ctrl[g]) if matches != 0 { // |key| absent ok = false return } g += 1 // linear probing if g >= uint32(len(m.groups)) { g = 0 } } } // Iter iterates the elements of the Map, passing them to the callback. // It guarantees that any key in the Map will be visited only once, and // for un-mutated Maps, every key will be visited once. If the Map is // Mutated during iteration, mutations will be reflected on return from // Iter, but the set of keys visited by Iter is non-deterministic. func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { // take a consistent view of the table in case // we rehash during iteration ctrl, groups := m.ctrl, m.groups // pick a random starting group g := randIntN(len(groups)) for n := 0; n < len(groups); n++ { for s, c := range ctrl[g] { if c == empty || c == tombstone { continue } k, v := groups[g].keys[s], groups[g].values[s] if stop := cb(k, v); stop { return } } g++ if g >= uint32(len(groups)) { g = 0 } } } // Clear removes all elements from the Map. func (m *Map[K, V]) Clear() { for i, c := range m.ctrl { for j := range c { m.ctrl[i][j] = empty } } var k K var v V for i := range m.groups { g := &m.groups[i] for i := range g.keys { g.keys[i] = k g.values[i] = v } } m.resident, m.dead = 0, 0 } // Count returns the number of elements in the Map. func (m *Map[K, V]) Count() int { return int(m.resident - m.dead) } // Capacity returns the number of additional elements // the can be added to the Map before resizing. func (m *Map[K, V]) Capacity() int { return int(m.limit - m.resident) } // find returns the location of |key| if present, or its insertion location if absent. // for performance, find is manually inlined into public methods. func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { g = probeStart(hi, len(m.groups)) for { matches := metaMatchH2(&m.ctrl[g], lo) for matches != 0 { s = nextMatch(&matches) if key == m.groups[g].keys[s] { return g, s, true } } // |key| is not in group |g|, // stop probing if we see an empty slot matches = metaMatchEmpty(&m.ctrl[g]) if matches != 0 { s = nextMatch(&matches) return g, s, false } g += 1 // linear probing if g >= uint32(len(m.groups)) { g = 0 } } } func (m *Map[K, V]) nextSize() (n uint32) { n = uint32(len(m.groups)) * 2 if m.dead >= (m.resident / 2) { n = uint32(len(m.groups)) } return } func (m *Map[K, V]) rehash(n uint32) { groups, ctrl := m.groups, m.ctrl m.groups = make([]group[K, V], n) m.ctrl = make([]metadata, n) for i := range m.ctrl { m.ctrl[i] = newEmptyMetadata() } m.hash = maphash.NewSeed(m.hash) m.limit = n * maxAvgGroupLoad m.resident, m.dead = 0, 0 for g := range ctrl { for s := range ctrl[g] { c := ctrl[g][s] if c == empty || c == tombstone { continue } m.Put(groups[g].keys[s], groups[g].values[s]) } } } func (m *Map[K, V]) loadFactor() float32 { slots := float32(len(m.groups) * groupSize) return float32(m.resident-m.dead) / slots } // numGroups returns the minimum number of groups needed to store |n| elems. func numGroups(n uint32) (groups uint32) { groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad if groups == 0 { groups = 1 } return } func newEmptyMetadata() (meta metadata) { for i := range meta { meta[i] = empty } return } func splitHash(h uint64) (h1, h2) { return h1((h & h1Mask) >> 7), h2(h & h2Mask) } func probeStart(hi h1, groups int) uint32 { return fastModN(uint32(hi), uint32(groups)) } // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ func fastModN(x, n uint32) uint32 { return uint32((uint64(x) * uint64(n)) >> 32) } // randIntN returns a random number in the interval [0, n). func randIntN(n int) uint32 { return fastModN(fastrand(), uint32(n)) }