diff --git a/core/src/Streamly/Internal/Data/Array/Type.hs b/core/src/Streamly/Internal/Data/Array/Type.hs index 99080b4348..df6dbc89c2 100644 --- a/core/src/Streamly/Internal/Data/Array/Type.hs +++ b/core/src/Streamly/Internal/Data/Array/Type.hs @@ -24,6 +24,7 @@ module Streamly.Internal.Data.Array.Type -- *** Mutable and Immutable , unsafeFreeze -- XXX unsafeFreezeMutArray , unsafeFreezeWithShrink -- XXX unsafeFreezeMutArrayShrink + , rightSize , unsafeThaw -- XXX unsafeThawArray , unsafeFromMutByteArray , unsafeCastMutByteArray @@ -89,6 +90,7 @@ module Streamly.Internal.Data.Array.Type , fromStreamN , fromStream , fromPureStreamN + , fromPureStreamMin , fromPureStream , fromCString# , fromCString @@ -398,6 +400,13 @@ unsafeFreezeWithShrink arr = unsafePerformIO $ do MA.MutArray ac as ae _ <- MA.rightSize arr return $ Array ac as ae +-- | Trim any reserved free space off the end of the array's backing buffer, +-- reallocating to a tighter capacity if the waste exceeds 25%. See +-- 'MA.rightSize' for the full policy. +{-# INLINE rightSize #-} +rightSize :: Unbox a => Array a -> Array a +rightSize = unsafeFreezeWithShrink . unsafeThaw + -- | Makes a mutable array using the underlying memory of the immutable array. -- -- Please make sure that there are no other references to the immutable array @@ -519,6 +528,9 @@ splice arr1 arr2 = -- allocated to size N, if the list terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- {-# INLINABLE fromListN #-} fromListN :: Unbox a => Int -> [a] -> Array a fromListN n xs = unsafePerformIO $ unsafeFreeze <$> MA.fromListN n xs @@ -534,6 +546,9 @@ RENAME_PRIME(pinnedFromListN,fromListN) -- The array is allocated to size N, if the list terminates before N elements -- then the array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- -- /Pre-release/ {-# INLINABLE fromListRevN #-} fromListRevN :: Unbox a => Int -> [a] -> Array a @@ -563,6 +578,10 @@ fromListRev xs = unsafePerformIO $ unsafeFreeze <$> MA.fromListRev xs -- allocated to size N, if the stream terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: this truncates. If the stream yields more than N elements the +-- trailing elements are silently dropped. Use 'fromStream' (which grows +-- dynamically) when the exact length is unknown. +-- -- >>> fromStreamN n = Stream.fold (Array.createOf n) -- -- /Pre-release/ @@ -1191,6 +1210,11 @@ toList s = build (\c n -> toListFB c n s) -- | @createOf n@ folds a maximum of @n@ elements from the input stream to an -- 'Array'. -- +-- /WARNING/: this is a truncating fold. If the input stream has more than +-- @n@ elements, the trailing elements are silently dropped. Pass an @n@ +-- that is at least the actual stream length, or use 'createWith' / 'create' +-- (which grow on overflow) when the exact length is unknown. +-- {-# INLINE_NORMAL createOf #-} createOf :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m a (Array a) createOf = fmap unsafeFreeze . MA.createOf @@ -1326,11 +1350,26 @@ unsafeMakePure (Fold step initial extract final) = (\s -> return $! unsafeInlineIO $ extract s) (\s -> return $! unsafeInlineIO $ final s) +-- | Convert a pure Identity stream to an array, allocating exactly @n@ +-- elements. +-- +-- /WARNING/: this truncates. If the stream has more than @n@ elements the +-- trailing elements are silently dropped. Use 'fromPureStreamMinN' (which +-- treats @n@ as a minimum and grows on overflow) or 'fromPureStream' when +-- the exact length is not known up front. +-- {-# INLINE fromPureStreamN #-} fromPureStreamN :: Unbox a => Int -> Stream Identity a -> Array a fromPureStreamN n x = unsafePerformIO $ fmap unsafeFreeze (MA.fromPureStreamN n x) +-- | Like 'fromPureStreamN' but @n@ is a minimum capacity hint, not a cap; the +-- buffer doubles on overflow so the full stream is always consumed. +{-# INLINE fromPureStreamMin #-} +fromPureStreamMin :: Unbox a => Int -> Stream Identity a -> Array a +fromPureStreamMin n x = + unsafePerformIO $ fmap unsafeFreeze (MA.fromPureStreamMin n x) + -- | Convert a pure stream in Identity monad to an immutable array. -- -- Same as the following but with better performance: diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 6b83e22156..fe2e8b18a9 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -146,6 +146,7 @@ module Streamly.Internal.Data.MutArray.Type , fromStreamN , fromStream , fromPureStreamN + , fromPureStreamMin , fromPureStream , fromCString# , fromW16CString# @@ -2727,6 +2728,11 @@ writeNAs ps = createWithOf (newAs ps) -- -- The array capacity is guranteed to be at least @n@. -- +-- /WARNING/: this is a truncating fold. If the input stream has more than @n@ +-- elements, the trailing elements are silently dropped. Pass an @n@ that is +-- at least the actual stream length, or use 'createMinOf' or 'create' (which +-- grow on overflow) if the exact length is unknown. +-- -- >>> createOf = MutArray.createWithOf MutArray.emptyOf -- >>> createOf n = Fold.take n (MutArray.unsafeCreateOf n) -- >>> createOf n = MutArray.appendMax n MutArray.empty @@ -2788,6 +2794,9 @@ writeRevNWith alloc n = FL.take n (writeRevNWithUnsafe alloc n) -- | Like 'createOf' but writes the array in reverse order. -- +-- /WARNING/: same truncation behaviour as 'createOf'; passing an @n@ smaller +-- than the stream length silently drops trailing input. +-- -- /Pre-release/ {-# INLINE_NORMAL revCreateOf #-} revCreateOf :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m a (MutArray a) @@ -2953,6 +2962,10 @@ fromStreamDNAs ps limit str = do -- | Create a MutArray of given size from a stream. -- +-- /WARNING/: this truncates. If the stream yields more than @n@ elements the +-- trailing elements are silently dropped. Use a value of @n@ that is at +-- least the actual stream length, or use a non-truncating builder. +-- -- >>> fromStreamN n = Stream.fold (MutArray.createOf n) -- {-# INLINE_NORMAL fromStreamN #-} @@ -2971,6 +2984,9 @@ fromStreamDN = fromStreamN -- allocated to size N, if the list terminates before N elements then the -- array may hold less than N elements. -- +-- /WARNING/: if the list has more than N elements the trailing elements are +-- silently dropped. +-- {-# INLINABLE fromListN #-} fromListN :: (MonadIO m, Unbox a) => Int -> [a] -> m (MutArray a) fromListN n xs = fromStreamN n $ D.fromList xs @@ -2989,11 +3005,24 @@ fromListRevN :: (MonadIO m, Unbox a) => Int -> [a] -> m (MutArray a) fromListRevN n xs = D.fold (revCreateOf n) $ D.fromList xs -- | Convert a pure stream in Identity monad to a mutable array. +-- +-- /WARNING/: this truncates. If the stream has more than @n@ elements the +-- trailing elements are silently dropped. Use 'fromPureStreamMinN' (which +-- treats @n@ as a minimum and grows on overflow) or 'fromPureStream' when +-- the exact length is not known up front. +-- {-# INLINABLE fromPureStreamN #-} fromPureStreamN :: (MonadIO m, Unbox a) => Int -> Stream Identity a -> m (MutArray a) fromPureStreamN n = D.fold (createOf n) . D.generalizeInner +-- | Like 'fromPureStreamN' but @n@ is a minimum capacity hint, not a cap; the +-- buffer doubles on overflow so the full stream is always consumed. +{-# INLINABLE fromPureStreamMin #-} +fromPureStreamMin :: (MonadIO m, Unbox a) => + Int -> Stream Identity a -> m (MutArray a) +fromPureStreamMin n = D.fold (createMinOf n) . D.generalizeInner + -- | Convert a pure stream in Identity monad to a mutable array. {-# INLINABLE fromPureStream #-} fromPureStream :: (MonadIO m, Unbox a) => Stream Identity a -> m (MutArray a) diff --git a/src/Streamly/Internal/Data/SmallArray.hs b/src/Streamly/Internal/Data/SmallArray.hs index 8452f9bde7..549c984578 100644 --- a/src/Streamly/Internal/Data/SmallArray.hs +++ b/src/Streamly/Internal/Data/SmallArray.hs @@ -102,6 +102,9 @@ foldr f z arr = runIdentity $ D.foldr f z $ toStreamD arr -- | @createOf n@ folds a maximum of @n@ elements from the input stream to an -- 'SmallArray'. -- +-- /WARNING/: this truncates. If the input stream has more than @n@ elements +-- the trailing elements are silently dropped. +-- -- Since we are folding to a 'SmallArray' @n@ should be <= 128, for larger number -- of elements use an 'Array' from either "Streamly.Data.Array.Generic" or "Streamly.Data.Array.Foreign". {-# INLINE_NORMAL createOf #-} @@ -144,6 +147,9 @@ fromStreamDN limit str = do -- array may hold less than @n@ elements if the length of the list <= -- @n@. -- +-- /WARNING/: if the list has more than @n@ elements the trailing elements +-- are silently dropped. +-- -- It is recommended to use a value of @n@ <= 128. For larger sized -- arrays, use an 'Array' from "Streamly.Data.Array" or -- "Streamly.Data.Array.Foreign" @@ -159,6 +165,9 @@ instance NFData a => NFData (SmallArray a) where -- array is allocated to size @n@, if the stream terminates before @n@ -- elements then the array may hold less than @n@ elements. -- +-- /WARNING/: this truncates. If the stream yields more than @n@ elements +-- the trailing elements are silently dropped. +-- -- For optimal performance use this with @n@ <= 128. {-# INLINE fromStreamN #-} fromStreamN :: MonadIO m => Int -> Stream m a -> m (SmallArray a) diff --git a/src/Streamly/Internal/Unicode/Utf8.hs b/src/Streamly/Internal/Unicode/Utf8.hs index 1f9877a123..718ebb9171 100644 --- a/src/Streamly/Internal/Unicode/Utf8.hs +++ b/src/Streamly/Internal/Unicode/Utf8.hs @@ -34,8 +34,9 @@ import System.IO.Unsafe (unsafePerformIO) import qualified Streamly.Data.Fold as Fold import qualified Streamly.Data.Stream as Stream import qualified Streamly.Internal.Data.Array as Array - ( fromStreamN + ( fromPureStreamN , read + , rightSize ) import qualified Streamly.Internal.Unicode.Stream as Unicode @@ -58,9 +59,13 @@ toArray (Utf8 arr) = arr {-# INLINEABLE pack #-} pack :: String -> Utf8 pack s = + -- UTF-8 emits up to 4 bytes per char; allocate the worst case so the + -- encoded stream cannot overflow, then rightSize trims the slack. Utf8 - $ unsafePerformIO - $ Array.fromStreamN len $ Unicode.encodeUtf8' $ Stream.fromList s + $ Array.rightSize + $ Array.fromPureStreamN (4 * len) + $ Unicode.encodeUtf8' + $ Stream.fromList s where