From c7053548f78a2681ca45e5ac1f7a4db7185b4226 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Wed, 6 May 2026 09:42:55 +0530 Subject: [PATCH 01/17] Move FileSystem/Posix, Windows to Syscall --- bench-test-lib/src/BenchTestLib/DirIO.hs | 3 --- core/src/Streamly/Internal/FileSystem/DirIO.hs | 4 ++-- core/src/Streamly/Internal/FileSystem/FileIO.hs | 4 ++-- .../Internal/{FileSystem => Syscall}/Posix/Errno.hs | 4 ++-- .../Internal/{FileSystem => Syscall}/Posix/File.hsc | 4 ++-- .../Internal/{FileSystem => Syscall}/Posix/ReadDir.hsc | 8 ++++---- .../Internal/{FileSystem => Syscall}/Windows/File.hsc | 2 +- .../{FileSystem => Syscall}/Windows/ReadDir.hsc | 4 ++-- core/streamly-core.cabal | 10 +++++----- test/Streamly/Test/FileSystem/Handle.hs | 4 ++-- 10 files changed, 22 insertions(+), 25 deletions(-) rename core/src/Streamly/Internal/{FileSystem => Syscall}/Posix/Errno.hs (94%) rename core/src/Streamly/Internal/{FileSystem => Syscall}/Posix/File.hsc (98%) rename core/src/Streamly/Internal/{FileSystem => Syscall}/Posix/ReadDir.hsc (99%) rename core/src/Streamly/Internal/{FileSystem => Syscall}/Windows/File.hsc (99%) rename core/src/Streamly/Internal/{FileSystem => Syscall}/Windows/ReadDir.hsc (98%) diff --git a/bench-test-lib/src/BenchTestLib/DirIO.hs b/bench-test-lib/src/BenchTestLib/DirIO.hs index b4e0355e41..dd9e588903 100644 --- a/bench-test-lib/src/BenchTestLib/DirIO.hs +++ b/bench-test-lib/src/BenchTestLib/DirIO.hs @@ -60,9 +60,6 @@ import qualified Streamly.Data.Unfold as Unfold import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.FileSystem.DirIO as Dir import qualified Streamly.FileSystem.Path as Path -#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -import qualified Streamly.Internal.FileSystem.Posix.ReadDir as Dir -#endif -------------------------------------------------------------------------------- -- Helpers diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs index 04dd1a5b7b..9d4899e4b8 100644 --- a/core/src/Streamly/Internal/FileSystem/DirIO.hs +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -180,9 +180,9 @@ import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import Streamly.Internal.FileSystem.Path (Path) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import qualified Streamly.Internal.Data.Fold as Fold -import Streamly.Internal.FileSystem.Windows.ReadDir (eitherReader, reader) +import Streamly.Internal.Syscall.Windows.ReadDir (eitherReader, reader) #else -import Streamly.Internal.FileSystem.Posix.ReadDir +import Streamly.Internal.Syscall.Posix.ReadDir ( readEitherChunks, eitherReader, reader) #endif import qualified Streamly.Internal.Data.Stream as S diff --git a/core/src/Streamly/Internal/FileSystem/FileIO.hs b/core/src/Streamly/Internal/FileSystem/FileIO.hs index 7b7db680d1..e5bd4cf8ce 100644 --- a/core/src/Streamly/Internal/FileSystem/FileIO.hs +++ b/core/src/Streamly/Internal/FileSystem/FileIO.hs @@ -96,9 +96,9 @@ import qualified Streamly.Internal.Data.Fold.Type as FL (Step(..), snoc, reduce) import qualified Streamly.Internal.FileSystem.Handle as FH #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -import qualified Streamly.Internal.FileSystem.Posix.File as File +import qualified Streamly.Internal.Syscall.Posix.File as File #else -import qualified Streamly.Internal.FileSystem.Windows.File as File +import qualified Streamly.Internal.Syscall.Windows.File as File #endif #include "inline.hs" diff --git a/core/src/Streamly/Internal/FileSystem/Posix/Errno.hs b/core/src/Streamly/Internal/Syscall/Posix/Errno.hs similarity index 94% rename from core/src/Streamly/Internal/FileSystem/Posix/Errno.hs rename to core/src/Streamly/Internal/Syscall/Posix/Errno.hs index b4c6ce5166..14d55e5ec5 100644 --- a/core/src/Streamly/Internal/FileSystem/Posix/Errno.hs +++ b/core/src/Streamly/Internal/Syscall/Posix/Errno.hs @@ -1,12 +1,12 @@ -- | --- Module : Streamly.Internal.FileSystem.Posix.Errno +-- Module : Streamly.Internal.Syscall.Posix.Errno -- Copyright : (c) 2024 Composewell Technologies -- -- License : BSD3 -- Maintainer : streamly@composewell.com -- Portability : GHC -module Streamly.Internal.FileSystem.Posix.Errno +module Streamly.Internal.Syscall.Posix.Errno ( #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) throwErrnoPath diff --git a/core/src/Streamly/Internal/FileSystem/Posix/File.hsc b/core/src/Streamly/Internal/Syscall/Posix/File.hsc similarity index 98% rename from core/src/Streamly/Internal/FileSystem/Posix/File.hsc rename to core/src/Streamly/Internal/Syscall/Posix/File.hsc index e42c5835c6..4399fc7302 100644 --- a/core/src/Streamly/Internal/FileSystem/Posix/File.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix/File.hsc @@ -1,4 +1,4 @@ -module Streamly.Internal.FileSystem.Posix.File +module Streamly.Internal.Syscall.Posix.File ( #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) @@ -85,7 +85,7 @@ import Foreign.C.Error (throwErrnoIfMinus1_) import Foreign.C.String (CString) import Foreign.C.Types (CInt(..)) import GHC.IO.Handle.FD (fdToHandle) -import Streamly.Internal.FileSystem.Posix.Errno (throwErrnoPathIfMinus1Retry) +import Streamly.Internal.Syscall.Posix.Errno (throwErrnoPathIfMinus1Retry) import Streamly.Internal.FileSystem.PosixPath (PosixPath) import System.IO (IOMode(..), Handle) import System.Posix.Types (Fd(..), CMode(..)) diff --git a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc similarity index 99% rename from core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc rename to core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc index fb68c9b38a..64e901a39c 100644 --- a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc @@ -1,12 +1,12 @@ -- | --- Module : Streamly.Internal.FileSystem.Posix.ReadDir +-- Module : Streamly.Internal.Syscall.Posix.ReadDir -- Copyright : (c) 2024 Composewell Technologies -- -- License : BSD3 -- Maintainer : streamly@composewell.com -- Portability : GHC -module Streamly.Internal.FileSystem.Posix.ReadDir +module Streamly.Internal.Syscall.Posix.ReadDir ( #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) readScanWith_ @@ -48,8 +48,8 @@ import Streamly.Internal.Data.Scanl (Scanl) import Streamly.Internal.Data.Stream (Stream(..), Step(..)) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import Streamly.Internal.FileSystem.Path (Path) -import Streamly.Internal.FileSystem.Posix.Errno (throwErrnoPathIfNullRetry) -import Streamly.Internal.FileSystem.Posix.File +import Streamly.Internal.Syscall.Posix.Errno (throwErrnoPathIfNullRetry) +import Streamly.Internal.Syscall.Posix.File (defaultOpenFlags, openAt, close) import Streamly.Internal.FileSystem.PosixPath (PosixPath(..)) import System.Posix.Types (Fd(..)) diff --git a/core/src/Streamly/Internal/FileSystem/Windows/File.hsc b/core/src/Streamly/Internal/Syscall/Windows/File.hsc similarity index 99% rename from core/src/Streamly/Internal/FileSystem/Windows/File.hsc rename to core/src/Streamly/Internal/Syscall/Windows/File.hsc index 1c61e8ea8a..2a57887ca5 100644 --- a/core/src/Streamly/Internal/FileSystem/Windows/File.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/File.hsc @@ -1,6 +1,6 @@ -- XXX When introducing platform specifc API, see Posix/File.hsc and design in -- the same consistent way. -module Streamly.Internal.FileSystem.Windows.File +module Streamly.Internal.Syscall.Windows.File ( #if defined(mingw32_HOST_OS) || defined(__MINGW32__) -- * Handle based diff --git a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc similarity index 98% rename from core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc rename to core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc index c17cddac05..ef426328c3 100644 --- a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc @@ -1,12 +1,12 @@ -- | --- Module : Streamly.Internal.FileSystem.Windows.ReadDir +-- Module : Streamly.Internal.Syscall.Windows.ReadDir -- Copyright : (c) 2024 Composewell Technologies -- -- License : BSD3 -- Maintainer : streamly@composewell.com -- Portability : GHC -module Streamly.Internal.FileSystem.Windows.ReadDir +module Streamly.Internal.Syscall.Windows.ReadDir ( #if defined(mingw32_HOST_OS) || defined(__MINGW32__) DirStream diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index d7498c1f98..95cf544770 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -413,14 +413,14 @@ library , Streamly.Internal.FileSystem.PathIO , Streamly.Internal.FileSystem.Handle , Streamly.Internal.FileSystem.File.Common - , Streamly.Internal.FileSystem.Posix.Errno - , Streamly.Internal.FileSystem.Posix.File - , Streamly.Internal.FileSystem.Posix.ReadDir + , Streamly.Internal.Syscall.Posix.Errno + , Streamly.Internal.Syscall.Posix.File + , Streamly.Internal.Syscall.Posix.ReadDir , Streamly.Internal.Syscall.Common , Streamly.Internal.Syscall.Posix , Streamly.Internal.Syscall.Windows - , Streamly.Internal.FileSystem.Windows.ReadDir - , Streamly.Internal.FileSystem.Windows.File + , Streamly.Internal.Syscall.Windows.ReadDir + , Streamly.Internal.Syscall.Windows.File , Streamly.Internal.FileSystem.FileIO , Streamly.Internal.FileSystem.DirIO diff --git a/test/Streamly/Test/FileSystem/Handle.hs b/test/Streamly/Test/FileSystem/Handle.hs index 86c817f78b..24105225e5 100644 --- a/test/Streamly/Test/FileSystem/Handle.hs +++ b/test/Streamly/Test/FileSystem/Handle.hs @@ -24,9 +24,9 @@ import System.IO ) import System.IO.Temp (withSystemTempDirectory) #if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) -import Streamly.Internal.FileSystem.Posix.File (openFile, withFile) +import Streamly.Internal.Syscall.Posix.File (openFile, withFile) #else -import Streamly.Internal.FileSystem.Windows.File (openFile, withFile) +import Streamly.Internal.Syscall.Windows.File (openFile, withFile) #endif import Test.QuickCheck (Property, forAll, Gen, vectorOf, choose) import Test.QuickCheck.Monadic (monadicIO, assert, run) From fb6930d3c5493a73b572d480435655e8e274fd0e Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 02:22:02 +0530 Subject: [PATCH 02/17] Add readEitherByteChunks for Windows --- .../Internal/Syscall/Windows/ReadDir.hsc | 240 +++++++++++++++++- 1 file changed, 238 insertions(+), 2 deletions(-) diff --git a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc index ef426328c3..1221eead26 100644 --- a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc @@ -13,6 +13,7 @@ module Streamly.Internal.Syscall.Windows.ReadDir , openDirStream , closeDirStream , readDirStreamEither + , readEitherByteChunks , eitherReader , reader #endif @@ -27,15 +28,22 @@ import Control.Monad.Catch (MonadCatch) import Control.Monad.IO.Class (MonadIO(..)) import Data.Char (ord, isSpace) import Data.IORef (IORef, newIORef, readIORef, writeIORef) -import Foreign.C (CInt(..), CWchar(..), Errno(..), errnoToIOError, peekCWString) +import Foreign.C + ( CInt(..), CSize(..), CWchar(..), Errno(..) + , errnoToIOError, peekCWString + ) +import Fusion.Plugin.Types (Fuse(..)) import Numeric (showHex) +import Streamly.Internal.Data.Array (Array(..)) +import Streamly.Internal.Data.MutByteArray (MutByteArray) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) -import Streamly.Internal.Data.Stream (Step(..)) +import Streamly.Internal.Data.Stream (Stream(..), Step(..)) import Streamly.Internal.FileSystem.Path (Path) import Streamly.Internal.FileSystem.WindowsPath (WindowsPath(..)) import System.IO.Error (ioeSetErrorString) import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.MutByteArray as MutByteArray import qualified Streamly.Internal.Data.Unfold as UF (bracketIO) import qualified Streamly.Internal.FileSystem.WindowsPath as Path import qualified System.Win32 as Win32 (failWith) @@ -149,6 +157,7 @@ openDirStream p = do fp_finddata <- mallocForeignPtrBytes (# const sizeof(WIN32_FIND_DATAW) ) withForeignPtr fp_finddata $ \dataPtr -> do handle <- + -- XXX should it be asCWString, so we do not need to use castPtr Array.asCStringUnsafe (Path.toArray path) $ \pathPtr -> do -- XXX Use getLastError to distinguish the case when no -- matching file is found. See the doc of FindFirstFileW. @@ -270,4 +279,231 @@ eitherReader f = -- out to be a problem for small filenames we can use getdents64 to use -- chunked read to avoid the overhead. UF.bracketIO openDirStream closeDirStream (streamEitherReader f) + +------------------------------------------------------------------------------ +-- Chunked byte-buffered reads +------------------------------------------------------------------------------ + +foreign import ccall unsafe "string.h memcpy" c_memcpy + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) + +foreign import ccall unsafe "wchar.h wcslen" c_wcslen + :: Ptr CWchar -> IO CSize + +-- Split a list in half. +splitHalf :: [a] -> ([a], [a]) +splitHalf xxs = split xxs xxs + + where + + split (x:xs) (_:_:ys) = + let (f, s) = split xs ys + in (x:f, s) + split xs _ = ([], xs) + +{-# ANN type ChunkStreamByteState Fuse #-} +data ChunkStreamByteState = + ChunkStreamByteInit + | ChunkStreamByteStop + | ChunkStreamByteLoop + WindowsPath -- current dir path + [WindowsPath] -- remaining dirs + DirStream -- current dir stream + MutByteArray + Int + | ChunkStreamReallocBuf + (Ptr CWchar) -- pending item name + WindowsPath -- current dir path + [WindowsPath] -- remaining dirs + DirStream -- current dir stream + MutByteArray + Int + | ChunkStreamDrainBuf + MutByteArray + Int + +-- TODO: instead of unsafeJoin use appendCWString +-- +-- NOTE: Unlike posix on Windows the file attribute to determine whether it is +-- a directory or not is always available so we do not need the code to handle +-- the case when they are not available, on Posix we need to use stat +-- explicitly in that case. + +-- | This function may not traverse all the directories supplied and it may +-- traverse the directories recursively. Left contains those directories that +-- were not traversed by this function, these may be the directories that were +-- supplied as input as well as newly discovered directories during traversal. +-- To traverse the entire tree we have to iterate this function on the Left +-- output. +-- +-- Right is a buffer containing UTF-16LE encoded directories and files +-- separated by newlines, with the parent path joined to each child name by a +-- backslash. +-- +{-# INLINE readEitherByteChunks #-} +readEitherByteChunks :: MonadIO m => + (ReadOptions -> ReadOptions) -> + [WindowsPath] -> Stream m (Either [WindowsPath] (Array Word8)) +readEitherByteChunks _confMod alldirs = + Stream step ChunkStreamByteInit + + where + + bufSize = 32000 + + -- The output is UTF-16LE encoded. The format per entry is: + -- dirPath ++ '\\' ++ name ++ '\n', where each character occupies 2 bytes. + copyToBuf dstArr pos dirPath name = do + nameLen <- fmap ((* 2) . fromIntegral) (liftIO $ c_wcslen name) + MutByteArray.unsafeAsPtr dstArr (\ptr -> liftIO $ do + let WindowsPath (Array dirArr start end) = dirPath + dirLen = end - start + endDir = pos + dirLen + -- separator (2 bytes) + newline (2 bytes) + endPos = endDir + nameLen + 4 + sepOff = ptr `plusPtr` endDir + nameOff = sepOff `plusPtr` 2 + nlOff = nameOff `plusPtr` nameLen + if (endPos < bufSize) + then do + MutByteArray.unsafePutSlice dirArr start dstArr pos dirLen + -- '\\' as UTF-16LE: 0x5C 0x00 + poke sepOff (92 :: Word8) + poke (sepOff `plusPtr` 1) (0 :: Word8) + _ <- c_memcpy nameOff (castPtr name) (fromIntegral nameLen) + -- '\n' as UTF-16LE: 0x0A 0x00 + poke nlOff (10 :: Word8) + poke (nlOff `plusPtr` 1) (0 :: Word8) + return (Just endPos) + else return Nothing + ) + + -- Returns Just (dname, dattrs) on success, Nothing at end of stream. The + -- returned dname pointer is valid until the next call to readNextEntry on + -- the same DirStream. + readNextEntry (DirStream (h, ref, fdata)) = + withForeignPtr fdata $ \ptr -> do + firstTime <- readIORef ref + success <- + if firstTime + then writeIORef ref False >> return True + else c_FindNextFileW h ptr + if success + then do + let dname = #{ptr WIN32_FIND_DATAW, cFileName} ptr + dattrs :: #{type DWORD} <- + #{peek WIN32_FIND_DATAW, dwFileAttributes} ptr + return (Just (dname, dattrs)) + else do + err <- getLastError + if err == (# const ERROR_NO_MORE_FILES ) + then return Nothing + else Win32.failWith "findNextFile" err + + step _ ChunkStreamByteInit = do + mbarr <- liftIO $ MutByteArray.new' bufSize + case alldirs of + (x:xs) -> do + ds <- liftIO $ openDirStream x + return $ Skip $ ChunkStreamByteLoop x xs ds mbarr 0 + [] -> return Stop + + step _ ChunkStreamByteStop = return Stop + + step _ (ChunkStreamReallocBuf pending curdir xs ds mbarr pos) = do + mbarr1 <- liftIO $ MutByteArray.new' bufSize + r1 <- copyToBuf mbarr1 0 curdir pending + case r1 of + Just pos2 -> + return $ Yield (Right (Array mbarr 0 pos)) + -- When we come in this state we have emitted dirs + (ChunkStreamByteLoop curdir xs ds mbarr1 pos2) + Nothing -> error "Dirname too big for bufSize" + + step _ (ChunkStreamDrainBuf mbarr pos) = + if pos == 0 + then return Stop + else return $ Yield (Right (Array mbarr 0 pos)) ChunkStreamByteStop + + step _ (ChunkStreamByteLoop icurdir ixs ids mbarr ipos) = + goOuter icurdir ids ixs ipos + + where + + -- This is recursed only when we open the next dir. + -- Encapsulates curdir and ds as static arguments. + goOuter curdir ds = goInner + + where + + -- This is recursed each time we find a dir. + -- Encapsulates dirs as static argument. + goInner dirs = nextEntry + + where + + {-# INLINE nextEntry #-} + nextEntry pos = do + r <- liftIO $ readNextEntry ds + case r of + Just (dname, dattrs) -> + handleDentry pos dname dattrs + Nothing -> handleEnd pos + + handleEnd pos = do + -- XXX Exception safety + liftIO $ closeDirStream ds + openNextDir pos + + openNextDir pos = + case dirs of + (x:xs) -> do + ds1 <- liftIO $ openDirStream x + goOuter x ds1 xs pos + [] -> + if pos == 0 + then return Stop + else return + $ Yield + (Right (Array mbarr 0 pos)) + ChunkStreamByteStop + + splitAndRealloc pos dname xs1 = + case xs1 of + [] -> + return $ Skip + (ChunkStreamReallocBuf dname curdir + [] ds mbarr pos) + _ -> do + let (h,t) = splitHalf xs1 + return $ Yield (Left t) + (ChunkStreamReallocBuf dname curdir + h ds mbarr pos) + + {-# INLINE handleFileEnt #-} + handleFileEnt pos dname = do + r <- copyToBuf mbarr pos curdir dname + case r of + Just pos1 -> nextEntry pos1 + Nothing -> splitAndRealloc pos dname dirs + + {-# INLINE handleDirEnt #-} + handleDirEnt pos dname = do + arr <- liftIO $ Array.fromW16CString dname + let path = + Path.unsafeJoin curdir (Path.unsafeFromArray arr) + dirs1 = path : dirs + r <- copyToBuf mbarr pos curdir dname + case r of + Just pos1 -> goInner dirs1 pos1 + Nothing -> splitAndRealloc pos dname dirs1 + + handleDentry pos dname dattrs = + if (dattrs .&. (#const FILE_ATTRIBUTE_DIRECTORY) /= 0) + then do + isMeta <- liftIO $ isMetaDir dname + if isMeta + then nextEntry pos + else handleDirEnt pos dname + else handleFileEnt pos dname #endif From 18dbbc361d57fba0a76fac9e5a2251f17d66b873 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 02:25:58 +0530 Subject: [PATCH 03/17] Export readEitherByteChunks from DirIO module --- core/src/Streamly/Internal/FileSystem/DirIO.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs index 9d4899e4b8..e6700da1cb 100644 --- a/core/src/Streamly/Internal/FileSystem/DirIO.hs +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -128,6 +128,7 @@ module Streamly.Internal.FileSystem.DirIO , readEither , readEitherPaths , readEitherChunks + , readEitherByteChunks -- We can implement this in terms of readAttrsRecursive without losing -- perf. @@ -180,10 +181,11 @@ import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import Streamly.Internal.FileSystem.Path (Path) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import qualified Streamly.Internal.Data.Fold as Fold -import Streamly.Internal.Syscall.Windows.ReadDir (eitherReader, reader) +import Streamly.Internal.Syscall.Windows.ReadDir + (readEitherByteChunks, eitherReader, reader) #else import Streamly.Internal.Syscall.Posix.ReadDir - ( readEitherChunks, eitherReader, reader) + ( readEitherChunks, readEitherByteChunks, eitherReader, reader) #endif import qualified Streamly.Internal.Data.Stream as S import qualified Streamly.Data.Unfold as UF From 5f775f330d869206211af6b7e092a433cdab77c2 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 02:37:42 +0530 Subject: [PATCH 04/17] Add readEitherChunks to Windows Readdir module --- .../Internal/Syscall/Windows/ReadDir.hsc | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc index 1221eead26..cde8f0a0ef 100644 --- a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc @@ -13,6 +13,7 @@ module Streamly.Internal.Syscall.Windows.ReadDir , openDirStream , closeDirStream , readDirStreamEither + , readEitherChunks , readEitherByteChunks , eitherReader , reader @@ -280,6 +281,113 @@ eitherReader f = -- chunked read to avoid the overhead. UF.bracketIO openDirStream closeDirStream (streamEitherReader f) +------------------------------------------------------------------------------ +-- Chunked path-list reads +------------------------------------------------------------------------------ + +{-# ANN type ChunkStreamState Fuse #-} +data ChunkStreamState = + ChunkStreamInit [WindowsPath] [WindowsPath] Int [WindowsPath] Int + | ChunkStreamLoop + WindowsPath -- current dir path + [WindowsPath] -- remaining dirs + DirStream -- current dir stream + [WindowsPath] -- dirs buffered + Int -- dir count + [WindowsPath] -- files buffered + Int -- file count + +-- | Like 'readEitherByteChunks' but yields lists of 'WindowsPath' instead of +-- byte buffers. Directories are emitted as 'Left' and files as 'Right'. Meta +-- entries (\".\" and \"..\") are filtered out. +{-# INLINE readEitherChunks #-} +readEitherChunks + :: MonadIO m + => (ReadOptions -> ReadOptions) + -> [WindowsPath] -> Stream m (Either [WindowsPath] [WindowsPath]) +readEitherChunks _confMod alldirs = + Stream step (ChunkStreamInit alldirs [] 0 [] 0) + + where + + -- We want to keep the dir batching as low as possible for better + -- concurrency esp when the number of dirs is low. + dirMax = 4 + fileMax = 1000 + + -- Returns Just (dname, dattrs) on success, Nothing at end of stream. + readNextEntry (DirStream (h, ref, fdata)) = + withForeignPtr fdata $ \ptr -> do + firstTime <- readIORef ref + success <- + if firstTime + then writeIORef ref False >> return True + else c_FindNextFileW h ptr + if success + then do + let dname = #{ptr WIN32_FIND_DATAW, cFileName} ptr + dattrs :: #{type DWORD} <- + #{peek WIN32_FIND_DATAW, dwFileAttributes} ptr + return (Just (dname, dattrs)) + else do + err <- getLastError + if err == (# const ERROR_NO_MORE_FILES ) + then return Nothing + else Win32.failWith "findNextFile" err + + step _ (ChunkStreamInit (x:xs) dirs ndirs files nfiles) = do + ds <- liftIO $ openDirStream x + return $ Skip (ChunkStreamLoop x xs ds dirs ndirs files nfiles) + + step _ (ChunkStreamInit [] [] _ [] _) = + return Stop + + step _ (ChunkStreamInit [] [] _ files _) = + return $ Yield (Right files) (ChunkStreamInit [] [] 0 [] 0) + + step _ (ChunkStreamInit [] dirs _ files _) = + return $ Yield (Left dirs) (ChunkStreamInit [] [] 0 files 0) + + step _ st@(ChunkStreamLoop curdir xs ds dirs ndirs files nfiles) = do + r <- liftIO $ readNextEntry ds + case r of + Just (dname, dattrs) -> + if (dattrs .&. (#const FILE_ATTRIBUTE_DIRECTORY) /= 0) + then do + isMeta <- liftIO $ isMetaDir dname + if isMeta + then return $ Skip st + else do + arr <- liftIO $ Array.fromW16CString dname + let path = + Path.unsafeJoin curdir + (Path.unsafeFromArray arr) + dirs1 = path : dirs + ndirs1 = ndirs + 1 + if ndirs1 >= dirMax + then return $ Yield (Left dirs1) + (ChunkStreamLoop curdir xs ds [] 0 files nfiles) + else return $ Skip + (ChunkStreamLoop + curdir xs ds dirs1 ndirs1 files nfiles) + else do + arr <- liftIO $ Array.fromW16CString dname + let path = + Path.unsafeJoin curdir + (Path.unsafeFromArray arr) + files1 = path : files + nfiles1 = nfiles + 1 + if nfiles1 >= fileMax + then return $ Yield (Right files1) + (ChunkStreamLoop curdir xs ds dirs ndirs [] 0) + else return $ Skip + (ChunkStreamLoop + curdir xs ds dirs ndirs files1 nfiles1) + Nothing -> do + -- XXX Exception safety + liftIO $ closeDirStream ds + return $ Skip (ChunkStreamInit xs dirs ndirs files nfiles) + ------------------------------------------------------------------------------ -- Chunked byte-buffered reads ------------------------------------------------------------------------------ From 59f32a10903fe4a9afd5f8c8e887f0e191e76df3 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 03:00:50 +0530 Subject: [PATCH 05/17] Simplify imports, export readEitherChunksPortable --- .../src/Streamly/Internal/FileSystem/DirIO.hs | 46 ++++++++----------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs index e6700da1cb..a80dbb7551 100644 --- a/core/src/Streamly/Internal/FileSystem/DirIO.hs +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -127,8 +127,9 @@ module Streamly.Internal.FileSystem.DirIO , readDirs , readEither , readEitherPaths - , readEitherChunks - , readEitherByteChunks + , OS.readEitherChunks + , OS.readEitherByteChunks + , readEitherChunksPortable -- We can implement this in terms of readAttrsRecursive without losing -- perf. @@ -138,10 +139,10 @@ module Streamly.Internal.FileSystem.DirIO -- * Unfolds -- | Use the more convenient stream APIs instead of unfolds where possible. - , reader + , OS.reader , fileReader , dirReader - , eitherReader + , OS.eitherReader , eitherReaderPaths {- @@ -179,13 +180,11 @@ import Data.Either (isRight, isLeft, fromLeft, fromRight) import Streamly.Data.Stream (Stream) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import Streamly.Internal.FileSystem.Path (Path) -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) import qualified Streamly.Internal.Data.Fold as Fold -import Streamly.Internal.Syscall.Windows.ReadDir - (readEitherByteChunks, eitherReader, reader) +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +import qualified Streamly.Internal.Syscall.Windows.ReadDir as OS #else -import Streamly.Internal.Syscall.Posix.ReadDir - ( readEitherChunks, readEitherByteChunks, eitherReader, reader) +import qualified Streamly.Internal.Syscall.Posix.ReadDir as OS #endif import qualified Streamly.Internal.Data.Stream as S import qualified Streamly.Data.Unfold as UF @@ -339,7 +338,7 @@ eitherReaderPaths ::(MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> eitherReaderPaths f = let () = Path.join in fmap (\(dir, x) -> bimap (dir ) (dir ) x) - $ UF.carry (eitherReader f) + $ UF.carry (OS.eitherReader f) -- -- | Read files only. @@ -349,7 +348,7 @@ eitherReaderPaths f = {-# INLINE fileReader #-} fileReader :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> Unfold m Path Path -fileReader f = fmap (fromRight undefined) $ UF.filter isRight (eitherReader f) +fileReader f = fmap (fromRight undefined) $ UF.filter isRight (OS.eitherReader f) -- | Read directories only. Filter out "." and ".." entries. -- @@ -358,7 +357,7 @@ fileReader f = fmap (fromRight undefined) $ UF.filter isRight (eitherReader f) {-# INLINE dirReader #-} dirReader :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> Unfold m Path Path -dirReader f = fmap (fromLeft undefined) $ UF.filter isLeft (eitherReader f) +dirReader f = fmap (fromLeft undefined) $ UF.filter isLeft (OS.eitherReader f) -- | Raw read of a directory. -- @@ -366,7 +365,7 @@ dirReader f = fmap (fromLeft undefined) $ UF.filter isLeft (eitherReader f) {-# INLINE read #-} read :: (MonadIO m, MonadCatch m) => Path -> Stream m Path -read = S.unfold reader +read = S.unfold OS.reader -- | Read directories as Left and files as Right. Filter out "." and ".." -- entries. The output contains the names of the directories and files. @@ -375,7 +374,7 @@ read = S.unfold reader {-# INLINE readEither #-} readEither :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> Path -> Stream m (Either Path Path) -readEither f = S.unfold (eitherReader f) +readEither f = S.unfold (OS.eitherReader f) -- | Like 'readEither' but prefix the names of the files and directories with -- the supplied directory path. @@ -386,20 +385,12 @@ readEitherPaths f dir = let () = Path.join in fmap (bimap (dir ) (dir )) $ readEither f dir -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) --- XXX Implement a custom version of readEitherChunks (like for Posix) for --- windows as well. Also implement readEitherByteChunks. --- --- XXX For a fast custom implementation of traversal, the Right could be the --- final array chunk including all files and dirs to be written to IO. The Left --- could be list of dirs to be traversed. --- --- This is a generic (but slower?) version of readEitherChunks using --- eitherReaderPaths. -{-# INLINE readEitherChunks #-} -readEitherChunks :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> +-- | A generic (but slower) version of 'readEitherChunks' using +-- 'eitherReaderPaths'. +{-# INLINE readEitherChunksPortable #-} +readEitherChunksPortable :: (MonadIO m, MonadCatch m) => (ReadOptions -> ReadOptions) -> [Path] -> Stream m (Either [Path] [Path]) -readEitherChunks f dirs = +readEitherChunksPortable f dirs = -- XXX Need to use a take to limit the group size. There will be separate -- limits for dir and files groups. S.groupsWhile grouper collector @@ -424,7 +415,6 @@ readEitherChunks f dirs = Right _ -> Left [x1] -- initial _ -> either (\xs -> Left (x1:xs)) Right b Right x1 -> fmap (x1:) b -#endif -- | Read files only. -- From f41f8bba6a5144f36c95109a282b53e498e6e694 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 06:07:08 +0530 Subject: [PATCH 06/17] Generalize appendPtrN to a polymorphic type --- .../Streamly/Internal/Data/MutArray/Type.hs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index fb58bf31bc..6ae87d4f65 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -3263,7 +3263,7 @@ unsafeSplice dst src = do (arrContents src) startSrc (arrContents dst) endDst srcLen return $ dst {arrEnd = endDst + srcLen} --- | Append specified number of bytes from a given pointer to the MutArray. +-- | Append specified number of Ptr items from a given pointer to the MutArray. -- -- /Unsafe:/ -- @@ -3274,20 +3274,22 @@ unsafeSplice dst src = do -- 3. the pointer passed is valid up to the given length. -- {-# INLINE unsafeAppendPtrN #-} -unsafeAppendPtrN :: MonadIO m => - MutArray Word8 -> Ptr Word8 -> Int -> m (MutArray Word8) -unsafeAppendPtrN arr ptr ptrLen = do - let newEnd = arrEnd arr + ptrLen +unsafeAppendPtrN :: forall m a. (MonadIO m, Unbox a) => + MutArray a -> Ptr a -> Int -> m (MutArray a) +unsafeAppendPtrN arr ptr count = do + let byteCount = count * SIZE_OF(a) + let newEnd = arrEnd arr + byteCount assertM(newEnd <= arrBound arr) - Unboxed.unsafePutPtrN ptr (arrContents arr) (arrEnd arr) ptrLen + Unboxed.unsafePutPtrN + (castPtr ptr) (arrContents arr) (arrEnd arr) byteCount return $ arr {arrEnd = newEnd} {-# INLINE appendPtrN #-} -appendPtrN :: MonadIO m => - MutArray Word8 -> Ptr Word8 -> Int -> m (MutArray Word8) -appendPtrN arr ptr ptrLen = do - arr1 <- growBy ptrLen arr - unsafeAppendPtrN arr1 ptr ptrLen +appendPtrN :: (Unbox a, MonadIO m) => + MutArray a -> Ptr a -> Int -> m (MutArray a) +appendPtrN arr ptr count = do + arr1 <- growBy count arr + unsafeAppendPtrN arr1 ptr count -- | @spliceWith sizer dst src@ mutates @dst@ to append @src@. If there is no -- reserved space available in @dst@ it is reallocated to a size determined by From 4200edfde09e4e06f1193df3d192a193b032f9d3 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 07:49:53 +0530 Subject: [PATCH 07/17] Tighten the signatures of asCString and asCWString Add asNullterminatedPtr to Array --- core/src/Streamly/Internal/Data/Array.hs | 59 ++++++++++++++----- .../Streamly/Internal/Data/MutArray/Type.hs | 41 ++++++------- .../Internal/FileSystem/Event/Linux.hs | 8 +-- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/core/src/Streamly/Internal/Data/Array.hs b/core/src/Streamly/Internal/Data/Array.hs index 44ae6d8f3b..3b9a012159 100644 --- a/core/src/Streamly/Internal/Data/Array.hs +++ b/core/src/Streamly/Internal/Data/Array.hs @@ -43,8 +43,9 @@ module Streamly.Internal.Data.Array , cast , asBytes , unsafeCast - , asCStringUnsafe -- XXX asCString - , asCWString + , asCString -- XXX can remove + , asCWString -- XXX can remove + , asNullTerminatedPtr -- * Subarrays -- , sliceOffLen @@ -107,6 +108,7 @@ module Streamly.Internal.Data.Array , splitOn , fold , foldBreakChunksK + , asCStringUnsafe ) where @@ -121,7 +123,9 @@ import Control.Monad.IO.Class (MonadIO(..)) import Data.Functor.Identity (Identity(..)) import Data.Proxy (Proxy(..)) import Data.Word (Word8) +import Foreign (Ptr, castPtr) import Foreign.C.String (CString, CWString) +import Foreign.C.Types (CWchar) import GHC.Types (SPEC(..)) import Streamly.Internal.Data.Unbox (Unbox(..)) import Prelude hiding (length, null, last, map, (!!), read, concat) @@ -396,12 +400,20 @@ streamTransform f arr = -- Casts ------------------------------------------------------------------------------- --- | Cast an array having elements of type @a@ into an array having elements of --- type @b@. The array size must be a multiple of the size of type @b@ --- otherwise accessing the last element of the array may result into a crash or --- a random value. +-- | Unsafely reinterpret an array of @a@ as an array of @b@. -- --- /Pre-release/ +-- The 'byteLength' of the array must be a multiple of the size of @b@. +-- Additionally, the start offset of the array must satisfy the alignment +-- requirements of @b@ whenever the target platform or element access +-- operations require aligned memory accesses. +-- +-- Violating these requirements results in undefined behavior, including +-- possible crashes or invalid values. +-- +-- If @a@ and @b@ have the same runtime representation, prefer using +-- 'coerce', which is safe: +-- +-- >> unsafeCast = coerce -- unsafeCast, castUnsafe :: Array a -> Array b unsafeCast (Array contents start end) = @@ -414,11 +426,14 @@ RENAME(castUnsafe,unsafeCast) asBytes :: Array a -> Array Word8 asBytes = unsafeCast --- | Cast an array having elements of type @a@ into an array having elements of --- type @b@. The length of the array should be a multiple of the size of the --- target element otherwise 'Nothing' is returned. +-- | Attempt to reinterpret an array as containing elements of type @b@. -- +-- Returns 'Nothing' if the byte length of the array is not a multiple +-- of the size of @b@. -- +-- This function does not validate alignment requirements for @b@. +-- If stricter alignment is required, copy the array into a properly +-- aligned buffer before casting. cast :: forall a b. (Unbox b) => Array a -> Maybe (Array b) cast arr = let len = byteLength arr @@ -436,11 +451,23 @@ cast arr = -- -- /Pre-release/ -- +{-# DEPRECATED asCStringUnsafe "Please use asCString instead." #-} asCStringUnsafe :: Array a -> (CString -> IO b) -> IO b -asCStringUnsafe arr = MA.asCString (unsafeThaw arr) +asCStringUnsafe arr act = + MA.asCString (unsafeThaw (unsafeCast arr)) $ \ptr -> act (castPtr ptr) --- | Convert an array of any element type into a null terminated CWString Ptr. --- The array is copied to pinned memory. +-- | Use an array of CChar as a null terminated CString Ptr in pinned memory. +-- +-- /Unsafe/ +-- +-- /O(n) Time: (creates a copy of the array)/ +-- +-- /Pre-release/ +-- +asCString :: Array Word8 -> (Ptr Word8 -> IO b) -> IO b +asCString arr = MA.asCString (unsafeThaw arr) + +-- | Use an array of CWchar as a null terminated CWString Ptr in pinned memory. -- -- /Unsafe/ -- @@ -448,9 +475,13 @@ asCStringUnsafe arr = MA.asCString (unsafeThaw arr) -- -- /Pre-release/ -- -asCWString :: Array a -> (CWString -> IO b) -> IO b +asCWString :: Array CWchar -> (CWString -> IO b) -> IO b asCWString arr = MA.asCWString (unsafeThaw arr) +{-# INLINE asNullTerminatedPtr #-} +asNullTerminatedPtr :: (Unbox a, Num a) => Array a -> (Ptr a -> IO b) -> IO b +asNullTerminatedPtr arr = MA.asNullTerminatedPtr (unsafeThaw arr) + ------------------------------------------------------------------------------- -- Folds ------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 6ae87d4f65..6b83e22156 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -46,8 +46,9 @@ module Streamly.Internal.Data.MutArray.Type , unsafeCast , asBytes , unsafeAsPtr -- XXX asPtr - , asCString - , asCWString + , asCString -- XXX can remove + , asCWString -- XXX can remove + , asNullTerminatedPtr -- ** Construction , empty @@ -519,8 +520,8 @@ import Data.Char (ord) import Data.Functor.Identity (Identity(..)) import Data.Proxy (Proxy(..)) import Data.Word (Word8, Word16) -import Foreign.C.String (CString, CWString) -import Foreign.C.Types (CSize(..), CChar, CWchar) +import Foreign.C.String (CWString) +import Foreign.C.Types (CSize(..), CWchar) import Foreign.Ptr (plusPtr, castPtr) import Streamly.Internal.Data.MutByteArray.Type ( MutByteArray(..) @@ -3664,27 +3665,27 @@ unsafeCreateWithPtr' cap pop = do ++ "length = " ++ show len ++ ", " ++ "capacity = " ++ show cap -asCString :: MutArray a -> (CString -> IO b) -> IO b -asCString arr act = do +{-# INLINE asNullTerminatedPtr #-} +asNullTerminatedPtr :: forall a b. (Unbox a, Num a) => + MutArray a -> (Ptr a -> IO b) -> IO b +asNullTerminatedPtr arr act = do let pinned = isPinned arr - req = byteLength arr + SIZE_OF(CChar) + req = byteLength arr + SIZE_OF(a) arr1 <- if byteCapacity arr < req || not pinned - then reallocExplicitAs Pinned 1 req arr + then reallocExplicitAs Pinned (SIZE_OF(a)) req arr else return arr - arr2 :: MutArray CChar <- snocUnsafe (unsafeCast arr1) (0 :: CChar) - unsafeAsPtr arr2 $ \ptr _ -> act (castPtr ptr) + arr2 <- snocUnsafe arr1 (0 :: a) + unsafeAsPtr arr2 $ \ptr _ -> act ptr -asCWString :: MutArray a -> (CWString -> IO b) -> IO b -asCWString arr act = do - let pinned = isPinned arr - req = byteLength arr + SIZE_OF(CWchar) - arr1 <- - if byteCapacity arr < req || not pinned - then reallocExplicitAs Pinned 1 req arr - else return arr - arr2 :: MutArray CWchar <- snocUnsafe (unsafeCast arr1) (0 :: CWchar) - unsafeAsPtr arr2 $ \ptr _ -> act (castPtr ptr) +-- | Provides a pinned, null terminated CString pointer (Ptr Word8). +asCString :: MutArray Word8 -> (Ptr Word8 -> IO b) -> IO b +asCString = asNullTerminatedPtr + +-- | Provides a pinned, null terminated CWString pointer (Ptr CWchar). +-- Note CWchar is 16-bit wide on Windows and 32-bit wide on Posix. +asCWString :: MutArray CWchar -> (CWString -> IO b) -> IO b +asCWString = asNullTerminatedPtr ------------------------------------------------------------------------------- -- Equality diff --git a/src/Streamly/Internal/FileSystem/Event/Linux.hs b/src/Streamly/Internal/FileSystem/Event/Linux.hs index 3b0e417b7a..b77dee7fbe 100644 --- a/src/Streamly/Internal/FileSystem/Event/Linux.hs +++ b/src/Streamly/Internal/FileSystem/Event/Linux.hs @@ -167,7 +167,7 @@ import Data.Word (Word8, Word32) import Foreign.C.Error (throwErrnoIfMinus1) import Foreign.C.String (CString) import Foreign.C.Types (CInt(..), CUInt(..)) -import Foreign.Ptr (Ptr) +import Foreign.Ptr (Ptr, castPtr) import Foreign.Storable (peek, peekByteOff, sizeOf) import GHC.IO.Device (IODeviceType(Stream)) import GHC.IO.FD (fdFD, FD(..)) @@ -191,7 +191,7 @@ import qualified Streamly.Unicode.Stream as U import qualified Streamly.Internal.FileSystem.Path as Path import qualified Streamly.Internal.Data.Array as A - ( asCStringUnsafe, unsafePinnedAsPtr + ( asNullTerminatedPtr, unsafePinnedAsPtr , unsafeSliceOffLen, read ) import qualified Streamly.Internal.FileSystem.DirIO as Dir @@ -715,9 +715,9 @@ addToWatch cfg@Config{..} watch0@(Watch handle wdMap) root0 path0 = do -- -- XXX The file may have even got deleted and then recreated which we will -- never get to know, document this. - wd <- A.asCStringUnsafe absPath $ \pathPtr -> + wd <- A.asNullTerminatedPtr absPath $ \pathPtr -> throwErrnoIfMinus1 ("addToWatch: " ++ utf8ToString absPath) $ - c_inotify_add_watch (fdFD fd) pathPtr (CUInt createFlags) + c_inotify_add_watch (fdFD fd) (castPtr pathPtr) (CUInt createFlags) -- We add the parent first so that we start getting events for any new -- creates and add the new subdirectories on creates while we are adding From 59a5c1f4e1f93a400281709843b88f7d4c04b60f Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 04:11:13 +0530 Subject: [PATCH 08/17] Rename joinCStr to appendCString --- core/src/Streamly/Internal/FileSystem/PosixPath.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 08e3cba0e4..7b88430f24 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -152,8 +152,8 @@ module Streamly.Internal.FileSystem.OS_PATH_TYPE -- , concat , unsafeJoin #ifndef IS_WINDOWS - , joinCStr - , joinCStr' + , appendCString + , appendCString' #endif , join , joinDir @@ -990,17 +990,17 @@ joinDir -- but always inserts a separator between the two paths even if the first path -- has a trailing separator or second path has a leading separator. -- -joinCStr :: OS_PATH_TYPE -> CString -> IO OS_PATH_TYPE -joinCStr (OS_PATH a) str = +appendCString :: OS_PATH_TYPE -> CString -> IO OS_PATH_TYPE +appendCString (OS_PATH a) str = fmap OS_PATH $ Common.appendCString Common.OS_NAME a str --- | Like 'joinCStr' but creates a pinned path. +-- | Like 'appendCString' but creates a pinned path. -- -joinCStr' :: +appendCString' :: OS_PATH_TYPE -> CString -> IO OS_PATH_TYPE -joinCStr' +appendCString' (OS_PATH a) str = fmap OS_PATH $ Common.appendCString' From 115a9c8c15a77847eb83e9abe84f1f59e883fe45 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 04:14:21 +0530 Subject: [PATCH 09/17] Add INLINE pragma to appendCString --- core/src/Streamly/Internal/FileSystem/PosixPath.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 7b88430f24..d72dbdd778 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -990,6 +990,7 @@ joinDir -- but always inserts a separator between the two paths even if the first path -- has a trailing separator or second path has a leading separator. -- +{-# INLINE appendCString #-} appendCString :: OS_PATH_TYPE -> CString -> IO OS_PATH_TYPE appendCString (OS_PATH a) str = fmap OS_PATH @@ -998,6 +999,7 @@ appendCString (OS_PATH a) str = -- | Like 'appendCString' but creates a pinned path. -- +{-# INLINE appendCString' #-} appendCString' :: OS_PATH_TYPE -> CString -> IO OS_PATH_TYPE appendCString' From e67cc8430aa6e9a9079cb049d3b4ad108bd5f150 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 03:57:14 +0530 Subject: [PATCH 10/17] Add appendCWString path operation for Windows --- .../Internal/FileSystem/Path/Common.hs | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index a74bff3321..c47f14d034 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -59,6 +59,8 @@ module Streamly.Internal.FileSystem.Path.Common , unsafeAppend , appendCString , appendCString' + , appendCWString + , appendCWString' , unsafeJoinPaths -- , joinRoot -- XXX append should be enough, see joinRootBody @@ -126,7 +128,7 @@ import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) import Data.Word (Word8, Word16) import Foreign (castPtr) -import Foreign.C (CString, CSize(..)) +import Foreign.C (CString, CSize(..), CWchar, CWString) import GHC.Base (unsafeChr, Addr#) import GHC.Ptr (Ptr(..)) import Language.Haskell.TH (Q, Exp) @@ -1506,36 +1508,52 @@ mkQ f = -- XXX Scan this entire module for pushing array operations to MutArray or -- MutByteArray modules. --- See also cstringLength# in GHC.CString in ghc-prim -foreign import ccall unsafe "string.h strlen" c_strlen_pinned - :: Addr# -> IO CSize - {-# INLINE appendCStringWith #-} -appendCStringWith :: - (Int -> IO (MutArray Word8)) +appendCStringWith :: (Unbox a, Integral a) => + (Int -> IO (MutArray a)) + -> (Addr# -> IO CSize) -> OS - -> Array Word8 - -> CString - -> IO (Array Word8) -appendCStringWith create os a b@(Ptr addrB#) = do - let lenA = Array.length a - lenB <- fmap fromIntegral $ c_strlen_pinned addrB# - assertM(lenA /= 0 && lenB /= 0) - let len = lenA + 1 + lenB - arr <- create len - arr1 <- MutArray.unsafeSplice arr (Array.unsafeThaw a) + -> Array a + -> Ptr a + -> IO (Array a) +appendCStringWith create strlen os origArr origStr@(Ptr strAddr#) = do + let countArr = Array.length origArr + countStr <- fmap fromIntegral $ strlen strAddr# + assertM(countArr /= 0 && countStr /= 0) + let count = countArr + 1 + countStr + arr <- create count + arr1 <- MutArray.unsafeSplice arr (Array.unsafeThaw origArr) arr2 <- MutArray.unsafeSnoc arr1 (charToWord (primarySeparator os)) - arr3 :: MutArray.MutArray Word8 <- - MutArray.unsafeAppendPtrN arr2 (castPtr b) lenB + arr3 <- MutArray.unsafeAppendPtrN arr2 origStr countStr return (Array.unsafeFreeze arr3) +-- See also cstringLength# in GHC.CString in ghc-prim +foreign import ccall unsafe "string.h strlen" c_strlen_pinned + :: Addr# -> IO CSize + {-# INLINE appendCString #-} appendCString :: OS -> Array Word8 -> CString -> IO (Array Word8) -appendCString = appendCStringWith MutArray.emptyOf +appendCString os arr cstr = + appendCStringWith MutArray.emptyOf c_strlen_pinned os arr (castPtr cstr) {-# INLINE appendCString' #-} appendCString' :: OS -> Array Word8 -> CString -> IO (Array Word8) -appendCString' = appendCStringWith MutArray.emptyOf' +appendCString' os arr cstr = + appendCStringWith MutArray.emptyOf' c_strlen_pinned os arr (castPtr cstr) + +foreign import ccall unsafe "wchar.h wcslen" c_wcslen_pinned + :: Addr# -> IO CSize + +-- | NOTE: CWchar is 16-bit wide on Windows and 32-bit wide on Posix. wcslen is +-- available on both Posix and Windows and counts accordingly in units of +-- 2-bytes or 4-bytes. +{-# INLINE appendCWString #-} +appendCWString :: OS -> Array CWchar -> CWString -> IO (Array CWchar) +appendCWString = appendCStringWith MutArray.emptyOf c_wcslen_pinned + +{-# INLINE appendCWString' #-} +appendCWString' :: OS -> Array CWchar -> CWString -> IO (Array CWchar) +appendCWString' = appendCStringWith MutArray.emptyOf' c_wcslen_pinned {-# INLINE doAppend #-} doAppend :: (Unbox a, Integral a) => OS -> Array a -> Array a -> Array a From 28ee08e58eb4472082e731ff4342a5b084653de7 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 04:16:57 +0530 Subject: [PATCH 11/17] Use appendCString from Path module --- .../Streamly/Internal/Syscall/Posix/ReadDir.hsc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc index 64e901a39c..5db85e21fe 100644 --- a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc @@ -57,7 +57,6 @@ import System.Posix.Types (Fd(..)) import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.MutByteArray as MutByteArray import qualified Streamly.Internal.Data.Unfold as UF (bracketIO) -import qualified Streamly.Internal.FileSystem.Path.Common as PathC import qualified Streamly.Internal.FileSystem.PosixPath as Path import Streamly.Internal.FileSystem.DirOptions @@ -214,7 +213,7 @@ statEntryType statEntryType conf parent dname = do -- XXX We can create a pinned array right here since the next call pins -- it anyway. - path <- appendCString parent dname + path <- Path.appendCString parent dname Array.asCStringUnsafe (Path.toArray path) $ \cStr -> do res <- stat (_followSymlinks conf) cStr case res of @@ -372,12 +371,6 @@ eitherReader confMod = -- chunked read to avoid the overhead. UF.bracketIO before after (streamEitherReader confMod) -{-# INLINE appendCString #-} -appendCString :: PosixPath -> CString -> IO PosixPath -appendCString (PosixPath a) b = do - arr <- PathC.appendCString PathC.Posix a b - pure $ PosixPath arr - {-# ANN type ChunkStreamState Fuse #-} data ChunkStreamState = ChunkStreamInit [PosixPath] [PosixPath] Int [PosixPath] Int @@ -439,7 +432,7 @@ readEitherChunks confMod alldirs = etype <- liftIO $ getEntryType conf curdir dname dtype case etype of EntryIsDir -> do - path <- liftIO $ appendCString curdir dname + path <- liftIO $ Path.appendCString curdir dname let dirs1 = path : dirs ndirs1 = ndirs + 1 in if ndirs1 >= dirMax @@ -448,7 +441,7 @@ readEitherChunks confMod alldirs = else return $ Skip (ChunkStreamLoop curdir xs dirp dirs1 ndirs1 files nfiles) EntryIsNotDir -> do - path <- liftIO $ appendCString curdir dname + path <- liftIO $ Path.appendCString curdir dname let files1 = path : files nfiles1 = nfiles + 1 in if nfiles1 >= fileMax @@ -690,7 +683,7 @@ readEitherByteChunks confMod alldirs = {-# INLINE handleDirEnt #-} handleDirEnt pos dname = do - path <- liftIO $ appendCString curdir dname + path <- liftIO $ Path.appendCString curdir dname let dirs1 = path : dirs r <- copyToBuf mbarr pos curdir dname case r of From 4d2130e6aff5e5f2b546051b3fc2315f332f2ba7 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 12:16:19 +0530 Subject: [PATCH 12/17] Add a clarification about not having appendCWString --- core/src/Streamly/Internal/FileSystem/PosixPath.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index d72dbdd778..3b27ea1b6e 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -982,8 +982,14 @@ joinDir Common.OS_NAME (Common.toString Unicode.UNICODE_DECODER) a b #endif +-- NOTE: Even though appendCString is portable and can be exposed for both +-- Windows and Posix, appendCWString is different (16-bit vs 32-bit) on Windows +-- and Posix. Since WindowsPath module is designed to work on both Windows and +-- Posix, we cannot expose appendCWString as it would use 32-bit string on +-- Posix even in the Windows module. We will need a fixed appendW16CString for +-- that. + -- XXX This can be pure, like append. --- XXX add appendCWString for Windows? #ifndef IS_WINDOWS -- | Append a separator and a CString to the Array. This is like 'unsafeJoin' From 5ee36cfba2041856c65f03c4e3cfefdbc221f4d4 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 13:45:55 +0530 Subject: [PATCH 13/17] Add asW16CString Path operation for Windows --- core/src/Streamly/Internal/FileSystem/PosixPath.hs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 3b27ea1b6e..7f3905ae3c 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -16,6 +16,7 @@ -- encoding. #if defined(IS_PORTABLE) +-- In the portable module these are defined as type aliases to actual types #define OS_PATH_TYPE Path #define OS_WORD_TYPE OsWord #define OS_CSTRING_TYPE OsCString @@ -23,8 +24,8 @@ #elif defined(IS_WINDOWS) #define OS_PATH_TYPE WindowsPath #define OS_WORD_TYPE Word16 -#define OS_CSTRING_TYPE CWString -#define AS_OS_CSTRING asCWString +#define OS_CSTRING_TYPE Ptr Word16 +#define AS_OS_CSTRING asW16CString #else #define OS_PATH_TYPE PosixPath #define OS_WORD_TYPE Word8 @@ -37,7 +38,7 @@ #define OS_NAME Windows #define OS_PATH WindowsPath #define OS_WORD Word16 -#define OS_CSTRING CWString +#define OS_CSTRING Ptr Word16 #define UNICODE_ENCODER encodeUtf16le' #define UNICODE_DECODER decodeUtf16le' #define UNICODE_DECODER_LAX decodeUtf16le @@ -222,10 +223,11 @@ import Data.Functor.Identity (Identity(..)) import Data.Maybe (fromJust, isJust) import Data.Word (Word8) #ifndef IS_WINDOWS +import Data.Coerce (coerce) import Foreign.C (CString) #else import Data.Word (Word16) -import Foreign.C (CWString) +import Foreign (Ptr) #endif import Language.Haskell.TH.Syntax (lift) import Streamly.Internal.Data.Array (Array(..)) @@ -869,13 +871,13 @@ instance Show OS_PATH where -- system calls on Posix. {-# INLINE AS_OS_CSTRING #-} AS_OS_CSTRING :: OS_PATH_TYPE -> (OS_CSTRING_TYPE -> IO a) -> IO a -AS_OS_CSTRING p = Array.asCStringUnsafe (toArray p) +AS_OS_CSTRING p = Array.asCString (coerce (toArray p)) #else -- | Use the path as a pinned CWString. Useful for using a WindowsPath in -- system calls on Windows. {-# INLINE AS_OS_CSTRING #-} AS_OS_CSTRING :: OS_PATH_TYPE -> (OS_CSTRING_TYPE -> IO a) -> IO a -AS_OS_CSTRING p = Array.asCWString (toArray p) +AS_OS_CSTRING p = Array.asW16CString (toArray p) #endif ------------------------------------------------------------------------------ From cf797bcd475e0ea4f975b6474557b05b346926ad Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 15:46:53 +0530 Subject: [PATCH 14/17] Use asNullTerminatedPtr in PosixPath --- core/src/Streamly/Internal/FileSystem/PosixPath.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/PosixPath.hs b/core/src/Streamly/Internal/FileSystem/PosixPath.hs index 7f3905ae3c..4682e7aedd 100644 --- a/core/src/Streamly/Internal/FileSystem/PosixPath.hs +++ b/core/src/Streamly/Internal/FileSystem/PosixPath.hs @@ -223,8 +223,8 @@ import Data.Functor.Identity (Identity(..)) import Data.Maybe (fromJust, isJust) import Data.Word (Word8) #ifndef IS_WINDOWS -import Data.Coerce (coerce) import Foreign.C (CString) +import Foreign (castPtr) #else import Data.Word (Word16) import Foreign (Ptr) @@ -871,13 +871,14 @@ instance Show OS_PATH where -- system calls on Posix. {-# INLINE AS_OS_CSTRING #-} AS_OS_CSTRING :: OS_PATH_TYPE -> (OS_CSTRING_TYPE -> IO a) -> IO a -AS_OS_CSTRING p = Array.asCString (coerce (toArray p)) +AS_OS_CSTRING p act = + Array.asNullTerminatedPtr (toArray p) $ \ptr -> act (castPtr ptr) #else -- | Use the path as a pinned CWString. Useful for using a WindowsPath in -- system calls on Windows. {-# INLINE AS_OS_CSTRING #-} AS_OS_CSTRING :: OS_PATH_TYPE -> (OS_CSTRING_TYPE -> IO a) -> IO a -AS_OS_CSTRING p = Array.asW16CString (toArray p) +AS_OS_CSTRING p = Array.asNullTerminatedPtr (toArray p) #endif ------------------------------------------------------------------------------ From f91b2b0ad1a2c8ab02157272c2fc9cb8598b9926 Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 16:39:44 +0530 Subject: [PATCH 15/17] Replace Array API calls with Path API in Posix Syscalls --- core/src/Streamly/Internal/FileSystem/PathIO.hs | 3 +-- core/src/Streamly/Internal/Syscall/Posix.hsc | 9 +++++---- core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/Streamly/Internal/FileSystem/PathIO.hs b/core/src/Streamly/Internal/FileSystem/PathIO.hs index 8ccd92137a..5652ff406e 100644 --- a/core/src/Streamly/Internal/FileSystem/PathIO.hs +++ b/core/src/Streamly/Internal/FileSystem/PathIO.hs @@ -52,8 +52,7 @@ getCurrentDirectory = modifyError $ do -- | Set the current working directory. setCurrentDirectory :: Path -> IO () -setCurrentDirectory p = modifyError $ - Syscall.setCwd (toArray p) +setCurrentDirectory p = modifyError $ Syscall.setCwd p where diff --git a/core/src/Streamly/Internal/Syscall/Posix.hsc b/core/src/Streamly/Internal/Syscall/Posix.hsc index 6e77c3e6d8..973430ae16 100644 --- a/core/src/Streamly/Internal/Syscall/Posix.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix.hsc @@ -35,11 +35,12 @@ import Foreign.Storable (peekByteOff) import GHC.Base (Addr##) import GHC.Ptr (Ptr(..)) import Streamly.Internal.Data.Array.Type (Array(..)) -import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.MutByteArray as MutByteArray import Streamly.Internal.Data.MutByteArray.Type (MutByteArray, PinnedState(..), unsafeAsPtr) import Streamly.Internal.Syscall.Common (retry) +import Streamly.Internal.FileSystem.PosixPath (PosixPath) +import qualified Streamly.Internal.FileSystem.PosixPath as Path import System.Posix.Types (CMode) #include @@ -81,9 +82,9 @@ getCwd = do foreign import ccall unsafe "chdir" c_chdir :: CString -> IO CInt -setCwd :: Array Word8 -> IO () -setCwd arr = - Array.asCStringUnsafe arr $ +setCwd :: PosixPath -> IO () +setCwd path = + Path.asCString path $ throwErrnoIfMinus1_ "setCwd" . c_chdir -------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc index 5db85e21fe..87aaaac69d 100644 --- a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc @@ -155,7 +155,7 @@ openDirStreamCString s = do -- {-# INLINE openDirStream #-} openDirStream :: PosixPath -> IO DirStream openDirStream p = - Array.asCStringUnsafe (Path.toArray p) $ \s -> do + Path.asCString p $ \s -> do -- openDirStreamCString s dirp <- throwErrnoPathIfNullRetry "openDirStream" p $ c_opendir s return (DirStream dirp) @@ -214,7 +214,7 @@ statEntryType conf parent dname = do -- XXX We can create a pinned array right here since the next call pins -- it anyway. path <- Path.appendCString parent dname - Array.asCStringUnsafe (Path.toArray path) $ \cStr -> do + Path.asCString path $ \cStr -> do res <- stat (_followSymlinks conf) cStr case res of Right mode -> pure $ From 3ad27e84a4b5f8364d8a0ae04c98b75ef4ba15eb Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 7 May 2026 07:25:41 +0530 Subject: [PATCH 16/17] Fix Windows Syscall module for Path/Array API changes Cleanup Windows FFI imports Replace unsafeJoin with appendW16CString in Win Readdir --- .../Internal/FileSystem/Path/Common.hs | 18 +---- core/src/Streamly/Internal/Syscall/Windows.hs | 20 ++++-- .../Internal/Syscall/Windows/Common.hs | 19 +++++ .../Internal/Syscall/Windows/File.hsc | 4 +- .../Internal/Syscall/Windows/ReadDir.hsc | 70 ++++++++++++------- core/streamly-core.cabal | 1 + 6 files changed, 82 insertions(+), 50 deletions(-) create mode 100644 core/src/Streamly/Internal/Syscall/Windows/Common.hs diff --git a/core/src/Streamly/Internal/FileSystem/Path/Common.hs b/core/src/Streamly/Internal/FileSystem/Path/Common.hs index c47f14d034..d58bf6c8e5 100644 --- a/core/src/Streamly/Internal/FileSystem/Path/Common.hs +++ b/core/src/Streamly/Internal/FileSystem/Path/Common.hs @@ -59,8 +59,7 @@ module Streamly.Internal.FileSystem.Path.Common , unsafeAppend , appendCString , appendCString' - , appendCWString - , appendCWString' + , appendCStringWith , unsafeJoinPaths -- , joinRoot -- XXX append should be enough, see joinRootBody @@ -128,7 +127,7 @@ import Data.Function ((&)) import Data.Functor.Identity (Identity(..)) import Data.Word (Word8, Word16) import Foreign (castPtr) -import Foreign.C (CString, CSize(..), CWchar, CWString) +import Foreign.C (CString, CSize(..)) import GHC.Base (unsafeChr, Addr#) import GHC.Ptr (Ptr(..)) import Language.Haskell.TH (Q, Exp) @@ -1541,19 +1540,6 @@ appendCString' :: OS -> Array Word8 -> CString -> IO (Array Word8) appendCString' os arr cstr = appendCStringWith MutArray.emptyOf' c_strlen_pinned os arr (castPtr cstr) -foreign import ccall unsafe "wchar.h wcslen" c_wcslen_pinned - :: Addr# -> IO CSize - --- | NOTE: CWchar is 16-bit wide on Windows and 32-bit wide on Posix. wcslen is --- available on both Posix and Windows and counts accordingly in units of --- 2-bytes or 4-bytes. -{-# INLINE appendCWString #-} -appendCWString :: OS -> Array CWchar -> CWString -> IO (Array CWchar) -appendCWString = appendCStringWith MutArray.emptyOf c_wcslen_pinned - -{-# INLINE appendCWString' #-} -appendCWString' :: OS -> Array CWchar -> CWString -> IO (Array CWchar) -appendCWString' = appendCStringWith MutArray.emptyOf' c_wcslen_pinned {-# INLINE doAppend #-} doAppend :: (Unbox a, Integral a) => OS -> Array a -> Array a -> Array a diff --git a/core/src/Streamly/Internal/Syscall/Windows.hs b/core/src/Streamly/Internal/Syscall/Windows.hs index 03eead05f3..69db078c0e 100644 --- a/core/src/Streamly/Internal/Syscall/Windows.hs +++ b/core/src/Streamly/Internal/Syscall/Windows.hs @@ -27,6 +27,8 @@ module Streamly.Internal.Syscall.Windows import Control.Monad (when) import Data.Word (Word16) +import Foreign (Ptr) +import Foreign.C (CWchar(..), CWString(..)) import System.Win32.Types (BOOL, DWORD, LPTSTR, UINT, failIfFalse_) import qualified System.Win32 as Win32 (failWith) import Streamly.Internal.Data.Array.Type (Array(..)) @@ -34,10 +36,15 @@ import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.MutByteArray as MutByteArray import Streamly.Internal.Data.MutByteArray.Type (MutByteArray, PinnedState(..), unsafeAsPtr) +import Streamly.Internal.FileSystem.WindowsPath (WindowsPath) +import qualified Streamly.Internal.FileSystem.WindowsPath as Path import Streamly.Internal.Syscall.Common (retry) -foreign import WINDOWS_CCONV unsafe "GetCurrentDirectoryW" - c_getCurrentDirectory :: DWORD -> LPTSTR -> IO UINT +-- Non-explicit import +import Streamly.Internal.Syscall.Windows.Common + +foreign import WINDOWS_CCONV unsafe "windows.h GetCurrentDirectoryW" + c_getCurrentDirectory :: DWORD -> LPTSTR -> IO DWORD foreign import ccall unsafe "windows.h GetLastError" c_GetLastError :: IO DWORD @@ -66,10 +73,9 @@ getCwd = do mba <- MutByteArray.rightSizeAs Unpinned (len * 2) arr return (Array mba 0 (fromIntegral (len * 2))) -foreign import WINDOWS_CCONV unsafe "SetCurrentDirectoryW" - c_setCurrentDirectory :: LPTSTR -> IO BOOL +foreign import WINDOWS_CCONV unsafe "windows.h SetCurrentDirectoryW" + c_SetCurrentDirectoryW :: LPTSTR -> IO BOOL -setCwd :: Array Word16 -> IO () -setCwd arr = - Array.asCWString arr (failIfFalse_ "setCwd" . c_setCurrentDirectory) +setCwd :: WindowsPath -> IO () +setCwd arr = asCWString arr (failIfFalse_ "setCwd" . c_SetCurrentDirectoryW) #endif diff --git a/core/src/Streamly/Internal/Syscall/Windows/Common.hs b/core/src/Streamly/Internal/Syscall/Windows/Common.hs new file mode 100644 index 0000000000..d5ea57318e --- /dev/null +++ b/core/src/Streamly/Internal/Syscall/Windows/Common.hs @@ -0,0 +1,19 @@ +-- | +-- Module : Streamly.Internal.Syscall.Windows.Common +-- Copyright : (c) 2026 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Portability : GHC + +module Streamly.Internal.Syscall.Windows.Common + ( asCWString + ) +where + +import Foreign.C (CWString) +import Foreign.Ptr (castPtr) +import Streamly.Internal.FileSystem.WindowsPath (WindowsPath) +import qualified Streamly.Internal.FileSystem.WindowsPath as Path + +asCWString :: WindowsPath -> (CWString -> IO a) -> IO a +asCWString p act = Path.asW16CString p $ \ptr -> act (castPtr ptr) diff --git a/core/src/Streamly/Internal/Syscall/Windows/File.hsc b/core/src/Streamly/Internal/Syscall/Windows/File.hsc index 2a57887ca5..206c311040 100644 --- a/core/src/Streamly/Internal/Syscall/Windows/File.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/File.hsc @@ -33,8 +33,10 @@ import GHC.IO.Handle.FD (fdToHandle') import qualified Streamly.Internal.FileSystem.File.Common as File import qualified Streamly.Internal.FileSystem.WindowsPath as Path +-- Non-explicit imports import Data.Bits import Foreign.Ptr +import Streamly.Internal.Syscall.Windows.Common import System.Win32 as Win32 hiding (createFile, failIfWithRetry) #include @@ -87,7 +89,7 @@ createFile :: -> Maybe Win32.HANDLE -> IO Win32.HANDLE createFile name access share mb_attr mode flag mb_h = - Path.asCWString name $ \c_name -> + asCWString name $ \c_name -> failIfWithRetry (== iNVALID_HANDLE_VALUE) (unwords ["CreateFile", Path.toString name]) diff --git a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc index cde8f0a0ef..1e7123c921 100644 --- a/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Windows/ReadDir.hsc @@ -6,6 +6,8 @@ -- Maintainer : streamly@composewell.com -- Portability : GHC +{-# LANGUAGE UnliftedFFITypes #-} + module Streamly.Internal.Syscall.Windows.ReadDir ( #if defined(mingw32_HOST_OS) || defined(__MINGW32__) @@ -29,6 +31,7 @@ import Control.Monad.Catch (MonadCatch) import Control.Monad.IO.Class (MonadIO(..)) import Data.Char (ord, isSpace) import Data.IORef (IORef, newIORef, readIORef, writeIORef) +import GHC.Base (Addr##) import Foreign.C ( CInt(..), CSize(..), CWchar(..), Errno(..) , errnoToIOError, peekCWString @@ -44,12 +47,15 @@ import Streamly.Internal.FileSystem.WindowsPath (WindowsPath(..)) import System.IO.Error (ioeSetErrorString) import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.MutArray as MutArray import qualified Streamly.Internal.Data.MutByteArray as MutByteArray import qualified Streamly.Internal.Data.Unfold as UF (bracketIO) +import qualified Streamly.Internal.FileSystem.Path.Common as PathC import qualified Streamly.Internal.FileSystem.WindowsPath as Path import qualified System.Win32 as Win32 (failWith) import Streamly.Internal.FileSystem.DirOptions +import Streamly.Internal.Syscall.Windows.Common (asCWString) import Foreign hiding (void) #include @@ -95,9 +101,12 @@ foreign import ccall unsafe "windows.h LocalFree" localFree :: Ptr a -> IO (Ptr a) ------------------------------------------------------------------------------ --- Haskell C APIs +-- FFI imports/Haskell C APIs ------------------------------------------------------------------------------ +foreign import ccall unsafe "string.h memcpy" c_memcpy + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) + foreign import ccall unsafe "maperrno_func" -- in base/cbits/Win32Utils.c c_maperrno_func :: ErrCode -> IO Errno @@ -144,6 +153,32 @@ failIf p wh act = do iNVALID_HANDLE_VALUE :: HANDLE iNVALID_HANDLE_VALUE = castUINTPtrToPtr maxBound +------------------------------------------------------------------------------ +-- Path string manipulation +------------------------------------------------------------------------------ + +foreign import ccall unsafe "wchar.h wcslen" c_wcslen + :: Ptr CWchar -> IO CSize + +foreign import ccall unsafe "wchar.h wcslen" c_wcslen_pinned + :: Addr## -> IO CSize + +-- This is defined here and not in Path module because wcslen is a platform +-- specific function and uses 32-bit wide chars on posix and 16-bit wide chars +-- on Windows. We cannot have it in WindowsPath module because that module is +-- plaform agnostic and works on Posix as well. +-- +{-# INLINE appendW16CString #-} +appendW16CString :: WindowsPath -> Ptr CWchar -> IO WindowsPath +appendW16CString (WindowsPath arr) str = + fmap WindowsPath + $ PathC.appendCStringWith + MutArray.emptyOf + c_wcslen_pinned + PathC.Windows + arr + (castPtr str) + ------------------------------------------------------------------------------ -- Dir stream implementation ------------------------------------------------------------------------------ @@ -158,14 +193,13 @@ openDirStream p = do fp_finddata <- mallocForeignPtrBytes (# const sizeof(WIN32_FIND_DATAW) ) withForeignPtr fp_finddata $ \dataPtr -> do handle <- - -- XXX should it be asCWString, so we do not need to use castPtr - Array.asCStringUnsafe (Path.toArray path) $ \pathPtr -> do + asCWString path $ \pathPtr -> do -- XXX Use getLastError to distinguish the case when no -- matching file is found. See the doc of FindFirstFileW. failIf (== iNVALID_HANDLE_VALUE) ("FindFirstFileW: " ++ Path.toString path) - $ c_FindFirstFileW (castPtr pathPtr) dataPtr + $ c_FindFirstFileW pathPtr dataPtr ref <- newIORef True return $ DirStream (handle, ref, fp_finddata) @@ -358,11 +392,8 @@ readEitherChunks _confMod alldirs = if isMeta then return $ Skip st else do - arr <- liftIO $ Array.fromW16CString dname - let path = - Path.unsafeJoin curdir - (Path.unsafeFromArray arr) - dirs1 = path : dirs + path <- liftIO $ appendW16CString curdir dname + let dirs1 = path : dirs ndirs1 = ndirs + 1 if ndirs1 >= dirMax then return $ Yield (Left dirs1) @@ -371,11 +402,8 @@ readEitherChunks _confMod alldirs = (ChunkStreamLoop curdir xs ds dirs1 ndirs1 files nfiles) else do - arr <- liftIO $ Array.fromW16CString dname - let path = - Path.unsafeJoin curdir - (Path.unsafeFromArray arr) - files1 = path : files + path <- liftIO $ appendW16CString curdir dname + let files1 = path : files nfiles1 = nfiles + 1 if nfiles1 >= fileMax then return $ Yield (Right files1) @@ -392,12 +420,6 @@ readEitherChunks _confMod alldirs = -- Chunked byte-buffered reads ------------------------------------------------------------------------------ -foreign import ccall unsafe "string.h memcpy" c_memcpy - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) - -foreign import ccall unsafe "wchar.h wcslen" c_wcslen - :: Ptr CWchar -> IO CSize - -- Split a list in half. splitHalf :: [a] -> ([a], [a]) splitHalf xxs = split xxs xxs @@ -430,8 +452,6 @@ data ChunkStreamByteState = MutByteArray Int --- TODO: instead of unsafeJoin use appendCWString --- -- NOTE: Unlike posix on Windows the file attribute to determine whether it is -- a directory or not is always available so we do not need the code to handle -- the case when they are not available, on Posix we need to use stat @@ -597,10 +617,8 @@ readEitherByteChunks _confMod alldirs = {-# INLINE handleDirEnt #-} handleDirEnt pos dname = do - arr <- liftIO $ Array.fromW16CString dname - let path = - Path.unsafeJoin curdir (Path.unsafeFromArray arr) - dirs1 = path : dirs + path <- liftIO $ appendW16CString curdir dname + let dirs1 = path : dirs r <- copyToBuf mbarr pos curdir dname case r of Just pos1 -> goInner dirs1 pos1 diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index 95cf544770..071b00ab7e 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -419,6 +419,7 @@ library , Streamly.Internal.Syscall.Common , Streamly.Internal.Syscall.Posix , Streamly.Internal.Syscall.Windows + , Streamly.Internal.Syscall.Windows.Common , Streamly.Internal.Syscall.Windows.ReadDir , Streamly.Internal.Syscall.Windows.File , Streamly.Internal.FileSystem.FileIO From 01064f39e2affbcf37a0de124e2815dd548a1c1a Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Fri, 8 May 2026 07:02:10 +0530 Subject: [PATCH 17/17] Update TODO comment in Posix readdir module --- .../Internal/Syscall/Posix/ReadDir.hsc | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc index 87aaaac69d..7c3b7c3658 100644 --- a/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc +++ b/core/src/Streamly/Internal/Syscall/Posix/ReadDir.hsc @@ -383,10 +383,27 @@ data ChunkStreamState = [PosixPath] -- files buffered Int -- file count --- XXX We can use a fold for collecting files and dirs. --- A fold may be useful to translate the output to whatever format we want, we --- can add a prefix or we can colorize it. The Right output would be the output --- of the fold which can be any type not just a Path. +-- XXX By using a fold we can unify readEitherChunks and readEitherByteChunks. +-- For chunks we need to pass toList as the fold, and for ByteChunks we can +-- pass a Fold m Path (Array Word). The Right output would be the output of the +-- fold which can be any type not just a Path. The fold itself can perform +-- transformations on the path e.g. colorization, filtering. In general we can +-- use a (Path -> Maybe Path) function to do the transformations, for Path +-- stream we can map it over the stream and for Folds we can use it to build +-- the fold. +-- +-- When using a fold compared to directly writing to an Array we have to make +-- one more copy of each Path element to copy it from the readdir buffer +-- (because it will go away) to a Path data type. However, the Path will be +-- quickly written down to the Array while it is still hot in L1 cache, so +-- there should not be much noticeable difference in perf. +-- +-- Things that need to be controlled at the level of readdir and not after +-- should be passed as readdir options e.g. maxDepth could be one such option +-- because readdir has the visibility over it post-filtering would be +-- inefficient. If we have to control it outside readdir we can efficiently +-- control it by controlling the depth of the directories being fed back to +-- concatIterate. -- XXX We can write a two fold scan to buffer and yield whichever fills first -- like foldMany, it would be foldEither. @@ -469,6 +486,7 @@ foreign import ccall unsafe "string.h memcpy" c_memcpy foreign import ccall unsafe "string.h strlen" c_strlen :: Ptr CChar -> IO CSize +-- XXX Move this to common -- Split a list in half. splitHalf :: [a] -> ([a], [a]) splitHalf xxs = split xxs xxs