From 1644a5c79789adb875d35db8aaf0cb86277d81c8 Mon Sep 17 00:00:00 2001 From: Patrick Steele Date: Sun, 5 Jan 2025 13:04:38 -0500 Subject: [PATCH 1/2] feat: implement open-addressed hash tables --- haskell-algorithms.cabal | 5 + src/Data/HashTable/Generic.hs | 77 ++++++++ src/Data/HashTable/Open.hs | 186 ++++++++++++++++++ test/Algorithms/ShortestPaths/DijkstraSpec.hs | 6 - test/Algorithms/TestUtil.hs | 6 + test/Data/HashTable/OpenSpec.hs | 23 +++ 6 files changed, 297 insertions(+), 6 deletions(-) create mode 100644 src/Data/HashTable/Generic.hs create mode 100644 src/Data/HashTable/Open.hs create mode 100644 test/Data/HashTable/OpenSpec.hs diff --git a/haskell-algorithms.cabal b/haskell-algorithms.cabal index c6e6b08..2469a49 100644 --- a/haskell-algorithms.cabal +++ b/haskell-algorithms.cabal @@ -58,6 +58,8 @@ library , Algorithms.Sorting.Sort3 , Algorithms.Sorting.Utility , Algorithms.Utility + , Data.HashTable.Generic + , Data.HashTable.Open , Data.Heap , Data.Heap.Fixed , Data.Heap.Fixed.Intrusive @@ -70,6 +72,7 @@ library , Data.Vector.Growable.Unboxed build-depends: base >=4.16.4.0 , containers + , hashable , mtl , random , optics @@ -119,10 +122,12 @@ test-suite tests , Algorithms.Sorting.TestUtil , Algorithms.TestUtil , Algorithms.UtilitySpec + , Data.HashTable.OpenSpec , Data.Heap.HeapSpec , Data.Vector.GrowableSpec build-depends: base >=4.16.4.0 , containers + , hashable , haskell-algorithms , hspec , HUnit diff --git a/src/Data/HashTable/Generic.hs b/src/Data/HashTable/Generic.hs new file mode 100644 index 0000000..025d1f5 --- /dev/null +++ b/src/Data/HashTable/Generic.hs @@ -0,0 +1,77 @@ +module Data.HashTable.Generic where + +import qualified Control.Monad as C +import Control.Monad.Primitive +import Control.Monad.ST +import Prelude hiding (mapM_) + +type Hash k = k -> Int + +type Probe k = Hash k -> k -> Int -> [Int] + +class HashTable f k v where + -- | Query the table for the value associated with a key. + basicLookup :: f s k v -> k -> ST s (Maybe v) + + -- | Associate a value with a key. + basicInsert :: f s k v -> k -> v -> ST s () + + -- | Remove a key and its associated value. + basicDelete :: f s k v -> k -> ST s () + + -- | The number of elements stored in the table. + basicLength :: f s k v -> ST s Int + + -- | The contents of the table. + basicToList :: f s k v -> ST s [(k, v)] + + -- | The keys of the table. + basicKeys :: f s k v -> ST s [k] + basicKeys = fmap (fmap fst) . basicToList + + -- | The values of the table. + basicValues :: f s k v -> ST s [v] + basicValues = fmap (fmap snd) . basicToList + +linear :: Hash k -> k -> Int -> [Int] +linear h k m = fmap f [0 .. m - 1] + where + f i = (h k + i) `mod` m + +quadratic :: Int -> Int -> Hash k -> k -> Int -> [Int] +quadratic c1 c2 h k m = fmap f [0 .. m - 1] + where + f i = (h k + i * c1 + i * i * c2) `mod` m + +lookup :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> k -> m (Maybe v) +lookup t k = stToPrim (basicLookup t k) + +insert :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> k -> v -> m () +insert t k v = stToPrim (basicInsert t k v) + +delete :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> k -> m () +delete t k = stToPrim (basicDelete t k) + +length :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> m Int +length t = stToPrim (basicLength t) + +toList :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> m [(k, v)] +toList t = stToPrim (basicToList t) + +keys :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> m [k] +keys t = stToPrim (basicKeys t) + +values :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> m [v] +values t = stToPrim (basicValues t) + +mapM_ :: (PrimMonad m, HashTable f k v) => (v -> m ()) -> f (PrimState m) k v -> m () +mapM_ f t = values t >>= C.mapM_ f + +kmapM_ :: (PrimMonad m, HashTable f k v) => (k -> v -> m ()) -> f (PrimState m) k v -> m () +kmapM_ f t = toList t >>= C.mapM_ (uncurry f) + +forM_ :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> (v -> m ()) -> m () +forM_ = flip mapM_ + +kforM_ :: (PrimMonad m, HashTable f k v) => f (PrimState m) k v -> (k -> v -> m ()) -> m () +kforM_ = flip kmapM_ diff --git a/src/Data/HashTable/Open.hs b/src/Data/HashTable/Open.hs new file mode 100644 index 0000000..00cc43e --- /dev/null +++ b/src/Data/HashTable/Open.hs @@ -0,0 +1,186 @@ +module Data.HashTable.Open + ( -- * Open-addressed hash tables + HashTable, + + -- ** Probe functions + G.linear, + G.quadratic, + + -- * Construction and destruction + new, + toList, + keys, + values, + + -- * Insertion, deletion, querying + insert, + delete, + lookup, + length, + + -- * Folds + mapM_, + kmapM_, + forM_, + kforM_, + ) +where + +import Control.Monad (unless) +import Control.Monad.Primitive +import Control.Monad.ST +import Data.Functor.Foldable +import qualified Data.HashTable.Generic as G +import Data.STRef +import qualified Data.Vector.Mutable as M +import Prelude hiding (length, lookup, mapM_) + +data Entry k v = Empty | Tombstone | Value (Int, k, v) + deriving (Eq) + +type Hash k = k -> Int + +type Probe k = Hash k -> k -> Int -> [Int] + +data HashTable s k v = HashTable + { hash :: Hash k, + probe :: Probe k, + size :: STRef s Int, + vec :: STRef s (M.MVector s (Entry k v)) + } + +instance (Eq k) => G.HashTable HashTable k v where + basicLookup = stLookup + basicInsert = stInsert + basicDelete = stDelete + basicToList = stToList + basicLength = stLength + +stNew :: Hash k -> Probe k -> Int -> ST s (HashTable s k v) +stNew hash probe size = + HashTable hash probe <$> newSTRef 0 <*> (M.replicate size Empty >>= newSTRef) + +stLookup :: (Eq k) => HashTable s k v -> k -> ST s (Maybe v) +stLookup (HashTable hashFun probe _ rv) k = + let hk = hashFun k + + alg _ Nil = pure Nothing + alg mv (Cons ix rest) = do + entry <- M.read mv ix + case entry of + Empty -> pure Nothing + Tombstone -> rest + Value (hk', k', v) -> + if hk == hk' && k == k' + then pure (Just v) + else rest + in do + mv <- readSTRef rv + cata (alg mv) (probe hashFun k (M.length mv)) + +stGrow :: (Eq k) => HashTable s k v -> ST s () +stGrow table@(HashTable _ _ _ rv) = do + mv <- readSTRef rv + mv' <- M.replicate (max 1 (M.length mv * 2)) Empty + writeSTRef rv mv' + + M.forM_ mv $ \case + Tombstone -> pure () + Empty -> pure () + Value (_, k, v) -> stInsert table k v + +stInsert :: (Eq k) => HashTable s k v -> k -> v -> ST s () +stInsert table@(HashTable hashFun probe sRef rv) k v = + let hk = hashFun k + + doInsert mv ix = M.write mv ix (Value (hk, k, v)) + + alg _ Nil = pure False + alg mv (Cons ix rest) = do + entry <- M.read mv ix + case entry of + Value (hk', k', _) -> + if hk == hk' && k == k' + then doInsert mv ix >> pure True -- Update an element + else rest + _ -> do + -- Write the entry + doInsert mv ix + + -- Mark the size increase + modifySTRef' sRef succ + + -- Note success + pure True + in do + currentSize <- readSTRef sRef + mv <- readSTRef rv + + let growThenInsert = stGrow table >> stInsert table k v + + if currentSize == M.length mv + then growThenInsert + else cata (alg mv) (probe hashFun k (M.length mv)) >>= (`unless` growThenInsert) + +stDelete :: (Eq k) => HashTable s k v -> k -> ST s () +stDelete (HashTable hashFun probe _ rv) k = + let hk = hashFun k + + alg _ Nil = pure () + alg mv (Cons ix rest) = do + entry <- M.read mv ix + case entry of + Empty -> pure () + Tombstone -> rest + Value (hk', k', _) -> + if hk == hk' && k == k' + then M.write mv ix Tombstone + else rest + in do + mv <- readSTRef rv + cata (alg mv) (probe hashFun k (M.length mv)) + +stToList :: HashTable s k v -> ST s [(k, v)] +stToList (HashTable _ _ _ rv) = + let f (Value (_, k, v)) rest = (k, v) : rest + f _ rest = rest + in readSTRef rv >>= M.foldr f [] + +stLength :: HashTable s k v -> ST s Int +stLength (HashTable _ _ sRef _) = readSTRef sRef + +new :: (PrimMonad m, Eq k) => Hash k -> Probe k -> Int -> m (HashTable (PrimState m) k v) +new h p s = stToPrim (stNew h p s) + +lookup :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> k -> m (Maybe v) +lookup = G.lookup + +insert :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> k -> v -> m () +insert = G.insert + +delete :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> k -> m () +delete = G.delete + +values :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> m [v] +values = G.values + +keys :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> m [k] +keys = G.keys + +toList :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> m [(k, v)] +toList = G.toList + +length :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> m Int +length = G.length + +mapM_ :: (PrimMonad m, Eq k) => (v -> m ()) -> HashTable (PrimState m) k v -> m () +mapM_ = G.mapM_ + +kmapM_ :: (PrimMonad m, Eq k) => (k -> v -> m ()) -> HashTable (PrimState m) k v -> m () +kmapM_ = G.kmapM_ + +forM_ :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> (v -> m ()) -> m () +forM_ = G.forM_ + +kforM_ :: (PrimMonad m, Eq k) => HashTable (PrimState m) k v -> (k -> v -> m ()) -> m () +kforM_ = G.kforM_ diff --git a/test/Algorithms/ShortestPaths/DijkstraSpec.hs b/test/Algorithms/ShortestPaths/DijkstraSpec.hs index 1617be2..35e0d02 100644 --- a/test/Algorithms/ShortestPaths/DijkstraSpec.hs +++ b/test/Algorithms/ShortestPaths/DijkstraSpec.hs @@ -26,12 +26,6 @@ manhattanGridNeighbors width (x, y) = filter inBounds [(x + 1, y), (x - 1, y), ( where inBounds = uncurry (&&) . dup bimap (between 0 width) -(==?) :: (Monad m, Show a, Eq a) => a -> a -> PropertyM m () -x ==? y = - if x == y - then assertWith True (show x <> " == " <> show y) - else assertWith False (show x <> " /= " <> show y) - dijkstrasManhattan :: Positive Int -> (Int, Int) -> (Int, Int) -> Property dijkstrasManhattan (Positive width) source' sink' = let mkValid = bimap f f diff --git a/test/Algorithms/TestUtil.hs b/test/Algorithms/TestUtil.hs index f227271..d596957 100644 --- a/test/Algorithms/TestUtil.hs +++ b/test/Algorithms/TestUtil.hs @@ -26,3 +26,9 @@ newtype D = D {unD :: Integer} failure :: (Monad m) => String -> PropertyM m () failure = assertWith False + +(==?) :: (Monad m, Show a, Eq a) => a -> a -> PropertyM m () +x ==? y = + if x == y + then assertWith True (show x <> " == " <> show y) + else assertWith False (show x <> " /= " <> show y) diff --git a/test/Data/HashTable/OpenSpec.hs b/test/Data/HashTable/OpenSpec.hs new file mode 100644 index 0000000..83845a3 --- /dev/null +++ b/test/Data/HashTable/OpenSpec.hs @@ -0,0 +1,23 @@ +module Data.HashTable.OpenSpec where + +import Algorithms.TestUtil +import Control.Monad +import qualified Data.HashTable.Open as H +import Data.Hashable +import Test.Hspec +import Test.Hspec.QuickCheck +import Test.QuickCheck +import Test.QuickCheck.Monadic + +spec :: Spec +spec = do + describe "Insertion" $ do + prop "Just v == insert k v >> lookup k" insertLookupProp + +insertLookupProp :: [(Int, Int)] -> Property +insertLookupProp kvs = monadicIO $ do + t <- run (H.new hash H.linear 0) + forM_ kvs $ \(k, v) -> do + run $ H.insert t k v + mv <- run (H.lookup t k) + mv ==? Just v From 7fd7d378bffcb954b8449d9fbcd8286ae0dc4f1e Mon Sep 17 00:00:00 2001 From: Patrick Steele Date: Tue, 7 Jan 2025 21:09:47 -0500 Subject: [PATCH 2/2] test(hash tables): with a bad hash function --- test/Data/HashTable/OpenSpec.hs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/Data/HashTable/OpenSpec.hs b/test/Data/HashTable/OpenSpec.hs index 83845a3..f21344a 100644 --- a/test/Data/HashTable/OpenSpec.hs +++ b/test/Data/HashTable/OpenSpec.hs @@ -13,6 +13,8 @@ spec :: Spec spec = do describe "Insertion" $ do prop "Just v == insert k v >> lookup k" insertLookupProp + describe "Insertion (with a bad hash)" $ do + prop "Just v == insert k v >> lookup k" insertLookupBadHashProp insertLookupProp :: [(Int, Int)] -> Property insertLookupProp kvs = monadicIO $ do @@ -21,3 +23,11 @@ insertLookupProp kvs = monadicIO $ do run $ H.insert t k v mv <- run (H.lookup t k) mv ==? Just v + +insertLookupBadHashProp :: [(Int, Int)] -> Property +insertLookupBadHashProp kvs = monadicIO $ do + t <- run (H.new (const 0) H.linear 0) + forM_ kvs $ \(k, v) -> do + run $ H.insert t k v + mv <- run (H.lookup t k) + mv ==? Just v