Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 100 additions & 51 deletions core/src/Streamly/Internal/FileSystem/Path/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,75 @@ parseSegment arr sepOff = (segOff, segCnt)
-- should not add another separator between root and path - thus joining root
-- and path in this case is anyway special.

-- | Split a path prefixed with "\\" into (drive, path) tuple.
-- | Parse a UNC-prefixed path. Caller must ensure 'isAbsoluteUNC' holds.
-- Returns @(splitOffset, isStructuralUNC, wellFormed)@:
--
-- * @splitOffset@: first byte after the share name (or after the segment
-- following @\\\\?\\@ for raw verbatim paths). This is the split point
-- used by 'unsafeSplitUNC'.
--
-- * @isStructuralUNC@: 'True' for paths whose root has the
-- @\\\\server\\share@ shape — either non-verbatim
-- (@\\\\server\\share[\\...]@) or verbatim
-- (@\\\\?\\UNC\\server\\share[\\...]@). 'False' for raw verbatim paths
-- that do not name a network share (e.g. @\\\\?\\C:\\...@, @\\\\.\\x@,
-- @\\\\??\\x@).
--
-- * @wellFormed@: for structural UNC paths, 'True' iff the server and
-- share names are both non-empty and separated by exactly one
-- separator. For raw verbatim paths the share rule does not apply and
-- this is always 'True'.
unsafeParseUNCEnd :: (Unbox a, Integral a) => Array a -> (Int, Bool, Bool)
unsafeParseUNCEnd arr =
if cnt1 == 1 && unsafeIndexChar 2 arr == '?'
then
-- XXX Should mixed-case markers like "UnC" be accepted as the UNC
-- variant too? Currently only exact uppercase "UNC" is matched, so
-- "\\?\UnC\srv\share\x" falls through to the raw-verbatim branch
-- (where the server+share rule does not apply). The alternative
-- is to either match case-insensitively here, or reject mixed-case
-- markers outright in validatePath. Write test cases for whichever
-- choice we settle on.
if uncLen == 3
&& unsafeIndexChar uncOff arr == 'U'
&& unsafeIndexChar (uncOff + 1) arr == 'N'
&& unsafeIndexChar (uncOff + 2) arr == 'C'
then
-- \\?\UNC\server\share\...
let sepStart = serverOff + serverLen
(shareOff, shareLen) = parseSegment arr sepStart
in ( shareOff + shareLen
, True
, serverLen > 0
&& shareLen > 0
&& shareOff - sepStart == 1
)
else
-- \\?\<anything else>... (raw verbatim, e.g. \\?\C:\...)
(sepOff1, False, True)
else
-- \\server\share\...
let (shareOff, shareLen) = parseSegment arr sepOff
in ( shareOff + shareLen
, True
, cnt1 > 0
&& shareLen > 0
&& shareOff - sepOff == 1
)

where

arr1 = snd $ Array.unsafeBreakAt 2 arr
cnt1 = countLeadingBy (not . isSeparatorWord Windows) arr1
sepOff = 2 + cnt1

-- XXX it should either be UNC or two letter drive in a valid path
(uncOff, uncLen) = parseSegment arr sepOff
sepOff1 = uncOff + uncLen
(serverOff, serverLen) = parseSegment arr sepOff1

-- | Split a path prefixed with "\\" into (drive, path) tuple. The share
-- name (where present) is treated as part of the root.
--
-- >>> toList (a,b) = (unpackPosix a, unpackPosix b)
-- >>> split = toList . Common.unsafeSplitUNC . packPosix
Expand All @@ -789,8 +857,11 @@ parseSegment arr sepOff = (segOff, segCnt)
-- >>> split "\\\\server\\"
-- ("\\\\server\\","")
--
-- >>> split "\\\\server\\home"
-- ("\\\\server\\","home")
-- >>> split "\\\\server\\share"
-- ("\\\\server\\share","")
--
-- >>> split "\\\\server\\share\\home"
-- ("\\\\server\\share\\","home")
--
-- >>> split "\\\\?\\c:"
-- ("\\\\?\\c:","")
Expand All @@ -807,32 +878,16 @@ parseSegment arr sepOff = (segOff, segCnt)
-- >>> split "\\\\?\\UNC\\server"
-- ("\\\\?\\UNC\\server","")
--
-- >>> split "\\\\?\\UNC/server\\home"
-- ("\\\\?\\UNC/server\\","home")
-- >>> split "\\\\?\\UNC/server\\share"
-- ("\\\\?\\UNC/server\\share","")
--
-- >>> split "\\\\?\\UNC/server\\share\\home"
-- ("\\\\?\\UNC/server\\share\\","home")
--
unsafeSplitUNC :: (Unbox a, Integral a) => Array a -> (Array a, Array a)
unsafeSplitUNC arr =
if cnt1 == 1 && unsafeIndexChar 2 arr == '?'
then do
if uncLen == 3
&& unsafeIndexChar uncOff arr == 'U'
&& unsafeIndexChar (uncOff + 1) arr == 'N'
&& unsafeIndexChar (uncOff + 2) arr == 'C'
then unsafeSplitPrefix Windows (serverOff + serverLen) arr
else unsafeSplitPrefix Windows sepOff1 arr
else unsafeSplitPrefix Windows sepOff arr

where

arr1 = snd $ Array.unsafeBreakAt 2 arr
cnt1 = countLeadingBy (not . isSeparatorWord Windows) arr1
sepOff = 2 + cnt1

-- XXX there should be only one separator in a valid path?
-- XXX it should either be UNC or two letter drive in a valid path
(uncOff, uncLen) = parseSegment arr sepOff
sepOff1 = uncOff + uncLen
(serverOff, serverLen) = parseSegment arr sepOff1
let (off, _, _) = unsafeParseUNCEnd arr
in unsafeSplitPrefix Windows off arr

-- XXX should we make the root Maybe? Both components will have to be Maybe to
-- avoid an empty path.
Expand Down Expand Up @@ -1333,7 +1388,7 @@ validatePathWith _ Posix path =
then throwM $ InvalidPath
$ "Null char found after " ++ show validLen ++ " characters."
else pure ()
validatePathWith allowRoot Windows path
validatePathWith _allowRoot Windows path
| Array.null path = throwM $ InvalidPath "Empty path"
| otherwise = do
if hasDrive path && postDriveSep > 1 -- "C://"
Expand All @@ -1348,13 +1403,17 @@ validatePathWith allowRoot Windows path
then throwM $ InvalidPath
-- XXX print the invalid component name
"Special filename component found in share root"
else if rootEndSeps /= 1 -- "//share//x"
else if isStructuralUNC && not uncWellFormed -- "//x", "//x/"
then throwM $ InvalidPath
$ "A UNC path must have both a server and a share name "
++ "separated by a single separator"
else if isStructuralUNC && rootEndSeps > 1 -- "//x/y//z"
then throwM $ InvalidPath
$ "Share name is needed and exactly one separator is needed "
++ "after the share root"
else if not allowRoot && Array.null stem -- "//share/"
"At most one separator is allowed after the share name"
else if not isStructuralUNC && rootEndSeps /= 1 -- "\\\\?\\c:x"
then throwM $ InvalidPath
"the share root must be followed by a non-empty path"
$ "Exactly one separator is needed after the verbatim "
++ "prefix segment"
else pure ()
else pure ()

Expand Down Expand Up @@ -1384,6 +1443,10 @@ validatePathWith allowRoot Windows path

rootEndSeps = countTrailingBy (isSeparatorWord Windows) root

-- Defined only when isAbsoluteUNC path holds; the only use sites below
-- are guarded by 'isAbsoluteUNC path' so it is fine to evaluate lazily.
(_, isStructuralUNC, uncWellFormed) = unsafeParseUNCEnd path

-- TBD: We are not currently validating the sharenames against disallowed
-- file names. Apparently windows does not allow even sharenames with those
-- names. To match against sharenames we will have to strip the separators
Expand Down Expand Up @@ -1869,22 +1932,14 @@ isVerbatimRoot a =
-- not be a plain root. If not then this function will not work correctly e.g.
-- it might change \/ to // making the path a share name from a normal path.
--
-- The drive letter (and the UNC server name that 'splitRoot' returns as the
-- root) is always compared case-insensitively. Verbatim @\\\\?\\@ paths are
-- compared byte-for-byte with no normalisation.
--
-- Note: 'splitRoot' currently treats the UNC share name as the first body
-- component, not as part of the root. The share name therefore follows the
-- body's 'ignoreCase' rule, not this root-only rule.
-- The drive letter and the full UNC root (server and share name that
-- 'splitRoot' returns) are always compared case-insensitively. Verbatim
-- @\\\\?\\@ paths are compared byte-for-byte with no normalisation.
eqWindowsRootWithDrive :: (Unbox a, Integral a) =>
Array a -> Array a -> Bool
eqWindowsRootWithDrive a b
| isVerbatimRoot a || isVerbatimRoot b = Array.byteEq a b
| otherwise =
-- XXX We probably do not want to translate UnC etc. to UNC.
-- Such a path should either be rejected in splitRoot or we
-- should not translate that here.
-- XXX if so write test cases for that.
let f = normalizeCaseAndSeparators . Array.unsafeCast
in runIdentity $ Stream.eqBy (==) (f a) (f b)

Expand Down Expand Up @@ -2009,10 +2064,8 @@ eqPath decoder os eqCfg@(EqCfg{..}) a b
-- Canonicalisation rules:
--
-- * Drive root: the drive letter is upper-cased (e.g. @c:\\@ -> @C:\\@).
-- * UNC root: the root (currently just the server name) is lower-cased
-- (e.g. @\\\\Server\\@ -> @\\\\server\\@). The share name is treated as
-- the first body component by 'splitRoot' and is normalised by
-- 'normaliseComponents' instead.
-- * UNC root: the entire root (server and share name) is lower-cased
-- (e.g. @\\\\Server\\Share\\@ -> @\\\\server\\share\\@).
-- * Verbatim @\\\\?\\@ device paths are left untouched.
--
-- Separators in non-verbatim roots are normalised to the primary separator.
Expand All @@ -2021,10 +2074,6 @@ normaliseWindowsDriveRoot :: (Unbox a, Integral a) =>
normaliseWindowsDriveRoot a
| isVerbatimRoot a = a
| isAbsoluteUNC a =
-- XXX We probably do not want to translate UnC etc. to UNC.
-- Such a path should either be rejected in splitRoot or we
-- should not translate that here.
-- XXX if so write test cases for that.
Array.fromPureStream
$ fmap charToWord
$ normalizeLowerCaseAndSeparators
Expand Down
31 changes: 18 additions & 13 deletions core/src/Streamly/Internal/FileSystem/PosixPath.hs
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ addTrailingSeparator p@(OS_PATH _arr) =
-- False
--
-- Mixing path separators:
--
-- >>> isValid "/x\\y"
-- True
-- >>> isValid "\\/" -- ?
Expand All @@ -558,7 +559,7 @@ addTrailingSeparator p@(OS_PATH _arr) =
-- >>> isValid "\\\\x"
-- False
-- >>> isValid "\\\\x\\"
-- True
-- False
-- >>> isValid "\\\\x\\y"
-- True
-- >>> isValid "//x/y"
Expand Down Expand Up @@ -593,7 +594,7 @@ addTrailingSeparator p@(OS_PATH _arr) =
--
-- >>> isValid "\\\\?\\UnC\\x" -- UnC treated as share name
-- True
-- >>> isValid "\\\\?\\UNC\\x" -- XXX fix
-- >>> isValid "\\\\?\\UNC\\x" -- server x but no share
-- False
-- >>> isValid "\\\\?\\UNC\\c:\\x"
-- True
Expand Down Expand Up @@ -1042,6 +1043,10 @@ unsafeJoinPaths = undefined
-- Splitting path
------------------------------------------------------------------------------

-- NOTE: definition of a path root. A root is a valid directory path, its
-- contents can be listed. A root does not have a parent directory.
-- TODO: add a short definition of root in splitRoot.

#ifndef IS_WINDOWS
-- | If a path is rooted then separate the root and the remaining path,
-- otherwise return 'Nothing'. The non-root
Expand Down Expand Up @@ -1531,10 +1536,9 @@ ignoreTrailingSeparators val conf = conf { _ignoreTrailingSeparators = val }
-- | When set to 'False', comparison is case sensitive.
--
-- On Windows this flag controls only the case-sensitivity of non-root path
-- segments. The drive letter (and the UNC server name that 'splitRoot'
-- returns as the root) is /always/ compared case-insensitively. The UNC
-- share name is currently treated as a body segment and so follows this
-- flag. Verbatim @\\\\?\\@ paths are always compared case-sensitively
-- segments. The drive letter and the full UNC root (server and share name
-- that 'splitRoot' returns) are /always/ compared case-insensitively.
-- Verbatim @\\\\?\\@ paths are always compared case-sensitively
-- (byte-for-byte), independent of this flag.
--
-- /Default/: False
Expand Down Expand Up @@ -1740,11 +1744,11 @@ collapseDotDots (OS_PATH p) =
#else
-- | Convert the path to an equivalent but standard format for reliable
-- comparison. Collapses redundant separators and removes @.@ components,
-- normalises the root (drive letter upper-cased, UNC server name
-- lower-cased, forward separators converted to backslash), and optionally
-- folds case per the 'EqCfg' options. Verbatim @\\\\?\\@ paths are left
-- untouched. Does /not/ collapse @..@ segments, as that is unsafe in the
-- presence of symlinks.
-- normalises the root (drive letter upper-cased, UNC root — server and
-- share name — lower-cased, forward separators converted to backslash),
-- and optionally folds case per the 'EqCfg' options. Verbatim @\\\\?\\@
-- paths are left untouched. Does /not/ collapse @..@ segments, as that is
-- unsafe in the presence of symlinks.
--
-- A trailing separator is preserved unless 'ignoreTrailingSeparators' is set
-- in the 'EqCfg'.
Expand All @@ -1765,9 +1769,10 @@ collapseDotDots (OS_PATH p) =
-- >>> f "c:\\x"
-- "C:\\x"
--
-- UNC server name is lower-cased; rest of the path follows ignoreCase:
-- UNC root (server and share name) is lower-cased; rest of the path
-- follows ignoreCase:
--
-- >>> f "\\\\Server\\share\\x"
-- >>> f "\\\\Server\\Share\\x"
-- "\\\\server\\share\\x"
--
-- Verbatim paths are left untouched:
Expand Down
Loading
Loading