From 8555f8e0d95db1ce0b14a23c73277b52083b1768 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 28 May 2026 00:30:15 +0530 Subject: [PATCH 1/3] Add a fromPureStreamMinN API --- core/src/Streamly/Internal/Data/Array/Type.hs | 8 ++++++++ core/src/Streamly/Internal/Data/MutArray/Type.hs | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/core/src/Streamly/Internal/Data/Array/Type.hs b/core/src/Streamly/Internal/Data/Array/Type.hs index 99080b4348..601106a0fd 100644 --- a/core/src/Streamly/Internal/Data/Array/Type.hs +++ b/core/src/Streamly/Internal/Data/Array/Type.hs @@ -89,6 +89,7 @@ module Streamly.Internal.Data.Array.Type , fromStreamN , fromStream , fromPureStreamN + , fromPureStreamMin , fromPureStream , fromCString# , fromCString @@ -1331,6 +1332,13 @@ fromPureStreamN :: Unbox a => Int -> Stream Identity a -> Array a fromPureStreamN n x = unsafePerformIO $ fmap unsafeFreeze (MA.fromPureStreamN n x) +-- | Like 'fromPureStreamN' but @n@ is a minimum capacity hint, not a cap; the +-- buffer doubles on overflow so the full stream is always consumed. +{-# INLINE fromPureStreamMin #-} +fromPureStreamMin :: Unbox a => Int -> Stream Identity a -> Array a +fromPureStreamMin n x = + unsafePerformIO $ fmap unsafeFreeze (MA.fromPureStreamMin n x) + -- | Convert a pure stream in Identity monad to an immutable array. -- -- Same as the following but with better performance: diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 6b83e22156..3f9e2318ac 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -146,6 +146,7 @@ module Streamly.Internal.Data.MutArray.Type , fromStreamN , fromStream , fromPureStreamN + , fromPureStreamMin , fromPureStream , fromCString# , fromW16CString# @@ -2994,6 +2995,13 @@ fromPureStreamN :: (MonadIO m, Unbox a) => Int -> Stream Identity a -> m (MutArray a) fromPureStreamN n = D.fold (createOf n) . D.generalizeInner +-- | Like 'fromPureStreamN' but @n@ is a minimum capacity hint, not a cap; the +-- buffer doubles on overflow so the full stream is always consumed. +{-# INLINABLE fromPureStreamMin #-} +fromPureStreamMin :: (MonadIO m, Unbox a) => + Int -> Stream Identity a -> m (MutArray a) +fromPureStreamMin n = D.fold (createMinOf n) . D.generalizeInner + -- | Convert a pure stream in Identity monad to a mutable array. {-# INLINABLE fromPureStream #-} fromPureStream :: (MonadIO m, Unbox a) => Stream Identity a -> m (MutArray a) From d44643f3b909d03f2fa15841caa8e6399b68be4a Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 28 May 2026 01:51:43 +0530 Subject: [PATCH 2/3] Add rightSize and warnings for stream truncating APIs --- core/src/Streamly/Internal/Data/Array/Type.hs | 31 +++++++++++++++++++ .../Streamly/Internal/Data/MutArray/Type.hs | 21 +++++++++++++ src/Streamly/Internal/Data/SmallArray.hs | 9 ++++++ 3 files changed, 61 insertions(+) diff --git a/core/src/Streamly/Internal/Data/Array/Type.hs b/core/src/Streamly/Internal/Data/Array/Type.hs index 601106a0fd..df6dbc89c2 100644 --- a/core/src/Streamly/Internal/Data/Array/Type.hs +++ b/core/src/Streamly/Internal/Data/Array/Type.hs @@ -24,6 +24,7 @@ module Streamly.Internal.Data.Array.Type -- *** Mutable and Immutable , unsafeFreeze -- XXX unsafeFreezeMutArray , unsafeFreezeWithShrink -- XXX unsafeFreezeMutArrayShrink + , rightSize , unsafeThaw -- XXX unsafeThawArray , unsafeFromMutByteArray , unsafeCastMutByteArray @@ -399,6 +400,13 @@ unsafeFreezeWithShrink arr = unsafePerformIO $ do MA.MutArray ac as ae _ <- MA.rightSize arr return $ Array ac as ae +-- | Trim any reserved free space off the end of the array's backing buffer, +-- reallocating to a tighter capacity if the waste exceeds 25%. See +-- 'MA.rightSize' for the full policy. +{-# INLINE rightSize #-} +rightSize :: Unbox a => Array a -> Array a +rightSize = unsafeFreezeWithShrink . unsafeThaw + -- | Makes a mutable array using the underlying memory of the immutable array. -- -- Please make sure that there are no other references to the immutable array @@ -520,6 +528,9 @@ splice arr1 arr2 = -- allocated to size N, if the list terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- {-# INLINABLE fromListN #-} fromListN :: Unbox a => Int -> [a] -> Array a fromListN n xs = unsafePerformIO $ unsafeFreeze <$> MA.fromListN n xs @@ -535,6 +546,9 @@ RENAME_PRIME(pinnedFromListN,fromListN) -- The array is allocated to size N, if the list terminates before N elements -- then the array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- -- /Pre-release/ {-# INLINABLE fromListRevN #-} fromListRevN :: Unbox a => Int -> [a] -> Array a @@ -564,6 +578,10 @@ fromListRev xs = unsafePerformIO $ unsafeFreeze <$> MA.fromListRev xs -- allocated to size N, if the stream terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: this truncates. If the stream yields more than N elements the +-- trailing elements are silently dropped. Use 'fromStream' (which grows +-- dynamically) when the exact length is unknown. +-- -- >>> fromStreamN n = Stream.fold (Array.createOf n) -- -- /Pre-release/ @@ -1192,6 +1210,11 @@ toList s = build (\c n -> toListFB c n s) -- | @createOf n@ folds a maximum of @n@ elements from the input stream to an -- 'Array'. -- +-- /WARNING/: this is a truncating fold. If the input stream has more than +-- @n@ elements, the trailing elements are silently dropped. Pass an @n@ +-- that is at least the actual stream length, or use 'createWith' / 'create' +-- (which grow on overflow) when the exact length is unknown. +-- {-# INLINE_NORMAL createOf #-} createOf :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m a (Array a) createOf = fmap unsafeFreeze . MA.createOf @@ -1327,6 +1350,14 @@ unsafeMakePure (Fold step initial extract final) = (\s -> return $! unsafeInlineIO $ extract s) (\s -> return $! unsafeInlineIO $ final s) +-- | Convert a pure Identity stream to an array, allocating exactly @n@ +-- elements. +-- +-- /WARNING/: this truncates. If the stream has more than @n@ elements the +-- trailing elements are silently dropped. Use 'fromPureStreamMinN' (which +-- treats @n@ as a minimum and grows on overflow) or 'fromPureStream' when +-- the exact length is not known up front. +-- {-# INLINE fromPureStreamN #-} fromPureStreamN :: Unbox a => Int -> Stream Identity a -> Array a fromPureStreamN n x = diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 3f9e2318ac..fe2e8b18a9 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -2728,6 +2728,11 @@ writeNAs ps = createWithOf (newAs ps) -- -- The array capacity is guranteed to be at least @n@. -- +-- /WARNING/: this is a truncating fold. If the input stream has more than @n@ +-- elements, the trailing elements are silently dropped. Pass an @n@ that is +-- at least the actual stream length, or use 'createMinOf' or 'create' (which +-- grow on overflow) if the exact length is unknown. +-- -- >>> createOf = MutArray.createWithOf MutArray.emptyOf -- >>> createOf n = Fold.take n (MutArray.unsafeCreateOf n) -- >>> createOf n = MutArray.appendMax n MutArray.empty @@ -2789,6 +2794,9 @@ writeRevNWith alloc n = FL.take n (writeRevNWithUnsafe alloc n) -- | Like 'createOf' but writes the array in reverse order. -- +-- /WARNING/: same truncation behaviour as 'createOf'; passing an @n@ smaller +-- than the stream length silently drops trailing input. +-- -- /Pre-release/ {-# INLINE_NORMAL revCreateOf #-} revCreateOf :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m a (MutArray a) @@ -2954,6 +2962,10 @@ fromStreamDNAs ps limit str = do -- | Create a MutArray of given size from a stream. -- +-- /WARNING/: this truncates. If the stream yields more than @n@ elements the +-- trailing elements are silently dropped. Use a value of @n@ that is at +-- least the actual stream length, or use a non-truncating builder. +-- -- >>> fromStreamN n = Stream.fold (MutArray.createOf n) -- {-# INLINE_NORMAL fromStreamN #-} @@ -2972,6 +2984,9 @@ fromStreamDN = fromStreamN -- allocated to size N, if the list terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- {-# INLINABLE fromListN #-} fromListN :: (MonadIO m, Unbox a) => Int -> [a] -> m (MutArray a) fromListN n xs = fromStreamN n $ D.fromList xs @@ -2990,6 +3005,12 @@ fromListRevN :: (MonadIO m, Unbox a) => Int -> [a] -> m (MutArray a) fromListRevN n xs = D.fold (revCreateOf n) $ D.fromList xs -- | Convert a pure stream in Identity monad to a mutable array. +-- +-- /WARNING/: this truncates. If the stream has more than @n@ elements the +-- trailing elements are silently dropped. Use 'fromPureStreamMinN' (which +-- treats @n@ as a minimum and grows on overflow) or 'fromPureStream' when +-- the exact length is not known up front. +-- {-# INLINABLE fromPureStreamN #-} fromPureStreamN :: (MonadIO m, Unbox a) => Int -> Stream Identity a -> m (MutArray a) diff --git a/src/Streamly/Internal/Data/SmallArray.hs b/src/Streamly/Internal/Data/SmallArray.hs index 8452f9bde7..549c984578 100644 --- a/src/Streamly/Internal/Data/SmallArray.hs +++ b/src/Streamly/Internal/Data/SmallArray.hs @@ -102,6 +102,9 @@ foldr f z arr = runIdentity $ D.foldr f z $ toStreamD arr -- | @createOf n@ folds a maximum of @n@ elements from the input stream to an -- 'SmallArray'. -- +-- /WARNING/: this truncates. If the input stream has more than @n@ elements +-- the trailing elements are silently dropped. +-- -- Since we are folding to a 'SmallArray' @n@ should be <= 128, for larger number -- of elements use an 'Array' from either "Streamly.Data.Array.Generic" or "Streamly.Data.Array.Foreign". {-# INLINE_NORMAL createOf #-} @@ -144,6 +147,9 @@ fromStreamDN limit str = do -- array may hold less than @n@ elements if the length of the list <= -- @n@. -- +-- /WARNING/: if the list has more than @n@ elements the trailing elements +-- are silently dropped. +-- -- It is recommended to use a value of @n@ <= 128. For larger sized -- arrays, use an 'Array' from "Streamly.Data.Array" or -- "Streamly.Data.Array.Foreign" @@ -159,6 +165,9 @@ instance NFData a => NFData (SmallArray a) where -- array is allocated to size @n@, if the stream terminates before @n@ -- elements then the array may hold less than @n@ elements. -- +-- /WARNING/: this truncates. If the stream yields more than @n@ elements +-- the trailing elements are silently dropped. +-- -- For optimal performance use this with @n@ <= 128. {-# INLINE fromStreamN #-} fromStreamN :: MonadIO m => Int -> Stream m a -> m (SmallArray a) From bf7c02279d1c9223f7fda0b98c9272984c293066 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 28 May 2026 01:52:37 +0530 Subject: [PATCH 3/3] Fix incorrect allocation bug in Utf8 pack --- src/Streamly/Internal/Unicode/Utf8.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Streamly/Internal/Unicode/Utf8.hs b/src/Streamly/Internal/Unicode/Utf8.hs index 1f9877a123..718ebb9171 100644 --- a/src/Streamly/Internal/Unicode/Utf8.hs +++ b/src/Streamly/Internal/Unicode/Utf8.hs @@ -34,8 +34,9 @@ import System.IO.Unsafe (unsafePerformIO) import qualified Streamly.Data.Fold as Fold import qualified Streamly.Data.Stream as Stream import qualified Streamly.Internal.Data.Array as Array - ( fromStreamN + ( fromPureStreamN , read + , rightSize ) import qualified Streamly.Internal.Unicode.Stream as Unicode @@ -58,9 +59,13 @@ toArray (Utf8 arr) = arr {-# INLINEABLE pack #-} pack :: String -> Utf8 pack s = + -- UTF-8 emits up to 4 bytes per char; allocate the worst case so the + -- encoded stream cannot overflow, then rightSize trims the slack. Utf8 - $ unsafePerformIO - $ Array.fromStreamN len $ Unicode.encodeUtf8' $ Stream.fromList s + $ Array.rightSize + $ Array.fromPureStreamN (4 * len) + $ Unicode.encodeUtf8' + $ Stream.fromList s where