From 5e50cfd41e912339960b13574d83c519a6f9e759 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Tue, 26 Dec 2023 23:44:45 +0800 Subject: [PATCH 01/17] Add IO-free variants of some functions Fixes #6 --- System/OsString/Common.hs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index ba9ab19..f585364 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -33,7 +33,13 @@ module System.OsString.MODULE_NAME , unsafeEncodeUtf , encodeWith , encodeFS +#ifdef WINDOWS + , fromString +#endif , fromBytes +#ifndef WINDOWS + , fromBytestring +#endif , pstr , singleton , empty @@ -254,6 +260,18 @@ encodeFS = fmap WindowsString . encodeWithBaseWindows encodeFS = fmap PosixString . encodeWithBasePosix #endif +#ifdef WINDOWS +-- | Like 'encodeFS', but not in IO. +-- +-- 'encodeFS' was designed to have a symmetric type signature +-- on unix and windows, but morally the function has no IO effects on windows, +-- so we provide this variant without breaking existing API. +-- +-- This function does not exist on unix. +fromString :: String -> WindowsString +fromString = unsafePerformIO . fmap WindowsString . encodeWithBaseWindows +#endif + #ifdef WINDOWS_DOC -- | Partial unicode friendly decoding. @@ -346,6 +364,18 @@ fromBytes bs = fromBytes = pure . PosixString . BSP.toShort #endif +#ifndef WINDOWS +-- | Like 'fromBytes', but not in IO. +-- +-- 'fromBytes' was designed to have a symmetric type signature +-- on unix and windows, but morally the function has no IO effects on unix, +-- so we provide this variant without breaking existing API. +-- +-- This function does not exist on windows. +fromBytestring :: ByteString -> PosixString +fromBytestring = PosixString . BSP.toShort +#endif + #ifdef WINDOWS_DOC -- | QuasiQuote a 'WindowsString'. This accepts Unicode characters From 618648708051d5fee43a0cbc704e88823f9037cd Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 9 Jun 2024 18:34:58 +0800 Subject: [PATCH 02/17] Add encodeLE/decodeLE, fixing #19 --- System/OsString.hs | 40 ++++++++++++++++++-- System/OsString/Common.hs | 56 ++++++++++++++++++++++++++-- System/OsString/Encoding.hs | 2 + System/OsString/Encoding/Internal.hs | 24 ++++++++++-- System/OsString/Internal.hs | 44 ++++++++++++++++++++-- changelog.md | 4 ++ os-string.cabal | 2 +- 7 files changed, 158 insertions(+), 14 deletions(-) diff --git a/System/OsString.hs b/System/OsString.hs index c4664af..f4ce501 100644 --- a/System/OsString.hs +++ b/System/OsString.hs @@ -24,6 +24,7 @@ module System.OsString , unsafeEncodeUtf , encodeWith , encodeFS + , encodeLE , osstr , empty , singleton @@ -33,6 +34,7 @@ module System.OsString , decodeUtf , decodeWith , decodeFS + , decodeLE , unpack -- * Word types @@ -136,14 +138,14 @@ import System.OsString.Internal , encodeUtf , unsafeEncodeUtf , encodeWith - , encodeFS + , encodeLE , osstr , pack , empty , singleton , decodeUtf , decodeWith - , decodeFS + , decodeLE , unpack , snoc , cons @@ -206,6 +208,38 @@ import System.OsString.Internal , findIndex , findIndices ) +import qualified System.OsString.Internal as SOI import System.OsString.Internal.Types ( OsString, OsChar, coercionToPlatformTypes ) -import Prelude () +import Prelude (String, IO) + +{-# DEPRECATED encodeFS "Use System.OsPath.encodeFS from filepath" #-} +-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem +-- operations (usually filepaths), which is: +-- +-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, +-- but PEP 383 only works properly on UTF-8 encodings, so good luck) +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +encodeFS :: String -> IO OsString +encodeFS = SOI.encodeFS + +{-# DEPRECATED decodeFS "Use System.OsPath.encodeFS from filepath" #-} +-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem +-- operations (usually filepaths), which is: +-- +-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, +-- but PEP 383 only works properly on UTF-8 encodings, so good luck) +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +decodeFS :: OsString -> IO String +decodeFS = SOI.decodeFS + diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index d225854..1ca38ea 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -33,6 +33,7 @@ module System.OsString.MODULE_NAME , unsafeEncodeUtf , encodeWith , encodeFS + , encodeLE , fromBytes , pstr , singleton @@ -43,6 +44,7 @@ module System.OsString.MODULE_NAME , decodeUtf , decodeWith , decodeFS + , decodeLE , unpack -- * Word construction @@ -242,14 +244,14 @@ encodeWith enc str = unsafePerformIO $ do #ifdef WINDOWS_DOC -- | This mimics the behavior of the base library when doing filesystem --- operations, which does permissive UTF-16 encoding, where coding errors generate +-- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range. -- -- The reason this is in IO is because it unifies with the Posix counterpart, -- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'. #else -- | This mimics the behavior of the base library when doing filesystem --- operations, which uses shady PEP 383 style encoding (based on the current locale, +-- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck). -- -- Looking up the locale requires IO. If you're not worried about calls @@ -258,11 +260,35 @@ encodeWith enc str = unsafePerformIO $ do #endif encodeFS :: String -> IO PLATFORM_STRING #ifdef WINDOWS +{-# DEPRECATED encodeFS "Use System.OsPath.Windows.encodeFS from filepath" #-} encodeFS = fmap WindowsString . encodeWithBaseWindows #else +{-# DEPRECATED encodeFS "Use System.OsPath.Posix.encodeFS from filepath" #-} encodeFS = fmap PosixString . encodeWithBasePosix #endif +#ifdef WINDOWS_DOC +-- | This mimics the behavior of the base library when doing string +-- operations, which does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range. +-- +-- The reason this is in IO is because it unifies with the Posix counterpart, +-- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'. +#else +-- | This mimics the behavior of the base library when doing string +-- operations, which uses 'getLocaleEncoding'. +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +#endif +encodeLE :: String -> IO PLATFORM_STRING +#ifdef WINDOWS +encodeLE = fmap WindowsString . encodeWithBaseWindows +#else +encodeLE = fmap PosixString . encodeWithBasePosix' +#endif + #ifdef WINDOWS_DOC -- | Partial unicode friendly decoding. @@ -317,7 +343,29 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do -- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however. #else -- | This mimics the behavior of the base library when doing filesystem --- operations, which uses shady PEP 383 style encoding (based on the current locale, +-- operations, which uses 'getLocaleEncoding'. +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +#endif +decodeLE :: PLATFORM_STRING -> IO String +#ifdef WINDOWS +decodeLE (WindowsString ba) = decodeWithBaseWindows ba +#else +decodeLE (PosixString ba) = decodeWithBasePosix' ba +#endif + +#ifdef WINDOWS_DOC +-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem +-- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range. +-- +-- The reason this is in IO is because it unifies with the Posix counterpart, +-- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however. +#else +-- | This mimics the behavior of the base library when doing filesystem +-- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck). -- -- Looking up the locale requires IO. If you're not worried about calls @@ -326,8 +374,10 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do #endif decodeFS :: PLATFORM_STRING -> IO String #ifdef WINDOWS +{-# DEPRECATED decodeFS "Use System.OsPath.Windows.decodeFS from filepath" #-} decodeFS (WindowsString ba) = decodeWithBaseWindows ba #else +{-# DEPRECATED decodeFS "Use System.OsPath.Posix.decodeFS from filepath" #-} decodeFS (PosixString ba) = decodeWithBasePosix ba #endif diff --git a/System/OsString/Encoding.hs b/System/OsString/Encoding.hs index 2e6c02e..c17abb3 100644 --- a/System/OsString/Encoding.hs +++ b/System/OsString/Encoding.hs @@ -23,6 +23,8 @@ module System.OsString.Encoding -- * base encoding , encodeWithBasePosix , decodeWithBasePosix + , encodeWithBasePosix' + , decodeWithBasePosix' , encodeWithBaseWindows , decodeWithBaseWindows ) diff --git a/System/OsString/Encoding/Internal.hs b/System/OsString/Encoding/Internal.hs index 08f2af2..3466ac1 100644 --- a/System/OsString/Encoding/Internal.hs +++ b/System/OsString/Encoding/Internal.hs @@ -31,7 +31,7 @@ import Numeric (showHex) import Foreign.C (CStringLen) import Data.Char (chr) import Foreign -import GHC.IO.Encoding (getFileSystemEncoding) +import GHC.IO.Encoding (getFileSystemEncoding, getLocaleEncoding) -- ----------------------------------------------------------------------------- -- UCS-2 LE @@ -270,9 +270,15 @@ peekWindowsString (cp, l) = do withPosixString :: String -> (CStringLen -> IO a) -> IO a withPosixString fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f +withPosixString' :: String -> (CStringLen -> IO a) -> IO a +withPosixString' fp f = getLocaleEncoding >>= \enc -> GHC.withCStringLen enc fp f + peekPosixString :: CStringLen -> IO String peekPosixString fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp +peekPosixString' :: CStringLen -> IO String +peekPosixString' fp = getLocaleEncoding >>= \enc -> GHC.peekCStringLen enc fp + -- | Decode with the given 'TextEncoding'. decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String decodeWithTE enc ba = unsafePerformIO $ do @@ -289,18 +295,30 @@ encodeWithTE enc str = unsafePerformIO $ do -- Encoders / decoders -- --- | This mimics the filepath decoder base uses on unix, +-- | This mimics the filepath decoder base uses on unix (using PEP-383), -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). decodeWithBasePosix :: BS8.ShortByteString -> IO String decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString fp --- | This mimics the filepath dencoder base uses on unix, +-- | This mimics the string decoder base uses on unix, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +decodeWithBasePosix' :: BS8.ShortByteString -> IO String +decodeWithBasePosix' ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString' fp + +-- | This mimics the filepath encoder base uses on unix (using PEP-383), -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). encodeWithBasePosix :: String -> IO BS8.ShortByteString encodeWithBasePosix str = withPosixString str $ \cstr -> BS8.packCStringLen cstr +-- | This mimics the string encoder base uses on unix, +-- with the small distinction that we're not truncating at NUL bytes (because we're not at +-- the outer FFI layer). +encodeWithBasePosix' :: String -> IO BS8.ShortByteString +encodeWithBasePosix' str = withPosixString' str $ \cstr -> BS8.packCStringLen cstr + -- | This mimics the filepath decoder base uses on windows, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index 1753d58..7f3d284 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -26,9 +26,11 @@ import System.OsString.Encoding ( EncodingException(..) ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) +import System.OsString.Encoding ( encodeWithBaseWindows, decodeWithBaseWindows ) import qualified System.OsString.Windows as PF #else import GHC.IO.Encoding.UTF8 ( mkUTF8 ) +import System.OsString.Encoding ( encodeWithBasePosix, decodeWithBasePosix ) import qualified System.OsString.Posix as PF #endif import GHC.Stack (HasCallStack) @@ -71,7 +73,7 @@ encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str #endif -- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: +-- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) @@ -82,7 +84,24 @@ encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). encodeFS :: String -> IO OsString -encodeFS = fmap OsString . PF.encodeFS +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +encodeFS = fmap (OsString . WindowsString) . encodeWithBaseWindows +#else +encodeFS = fmap (OsString . PosixString) . encodeWithBasePosix +#endif + +-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing string +-- operations, which is: +-- +-- 1. on unix this uses 'getLocaleEncoding' +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +encodeLE :: String -> IO OsString +encodeLE = fmap OsString . PF.encodeLE -- | Partial unicode friendly decoding. @@ -110,7 +129,7 @@ decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: +-- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) @@ -121,7 +140,24 @@ decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). decodeFS :: OsString -> IO String -decodeFS (OsString x) = PF.decodeFS x +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +decodeFS (OsString (WindowsString x)) = decodeWithBaseWindows x +#else +decodeFS (OsString (PosixString x)) = decodeWithBasePosix x +#endif + +-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing string operations, +-- which is: +-- +-- 1. on unix this uses 'getLocaleEncoding' +-- 2. on windows does permissive UTF-16 encoding, where coding errors generate +-- Chars in the surrogate range +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure +-- to deeply evaluate the result to catch exceptions). +decodeLE :: OsString -> IO String +decodeLE (OsString x) = PF.decodeLE x -- | Constructs an @OsString@ from a ByteString. diff --git a/changelog.md b/changelog.md index 79e4b4b..dcfe77c 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string) +## 2.0.5 *Jun 2024* + +* Add `decodeLE`/`encodeLE` and deprecate `decodeFS`/`encodeFS` (pointing users to `System.OsPath` instead), fixes [#19](https://github.com/haskell/os-string/issues/19) + ## 2.0.3 *May 2024* * Fix `length` function wrt [#17](https://github.com/haskell/os-string/issues/17) diff --git a/os-string.cabal b/os-string.cabal index 827da78..7698c19 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: os-string -version: 2.0.3 +version: 2.0.5 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause From 0228e2465eb8dfd6affee3bc7f092ac0df201eab Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 30 Jun 2024 01:02:09 +0800 Subject: [PATCH 03/17] Bump to 2.0.6 --- changelog.md | 5 +++++ os-string.cabal | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index f362b14..2259038 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,10 @@ # Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string) +## 2.0.6 *Jun 2024* + +* add `fromString` on windows +* add `fromBytestring` on unix + ## 2.0.5 *Jun 2024* * Add `decodeLE`/`encodeLE` and deprecate `decodeFS`/`encodeFS` (pointing users to `System.OsPath` instead), fixes [#19](https://github.com/haskell/os-string/issues/19) diff --git a/os-string.cabal b/os-string.cabal index 7698c19..ee35404 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: os-string -version: 2.0.5 +version: 2.0.6 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause From 06a9edc5d25191f658407786d9d964e34d09755f Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 30 Jun 2024 01:08:03 +0800 Subject: [PATCH 04/17] Improve 'fromString' haddocks --- System/OsString/Common.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 3bc7e66..9916acc 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -296,12 +296,14 @@ encodeLE = fmap PosixString . encodeWithBasePosix' #endif #ifdef WINDOWS --- | Like 'encodeFS', but not in IO. +-- | Like 'encodeLE but not in IO. -- --- 'encodeFS' was designed to have a symmetric type signature +-- 'encodeLE' was designed to have a symmetric type signature -- on unix and windows, but morally the function has no IO effects on windows, -- so we provide this variant without breaking existing API. -- +-- On windows, 'encodeLE' is equivalent to 'encodeFS'. +-- -- This function does not exist on unix. -- -- @since 2.0.6 From 2cb0a50fa6fe9ed775e59b58f6c304b00ec6cf84 Mon Sep 17 00:00:00 2001 From: Zubin Duggal Date: Fri, 11 Oct 2024 16:34:24 +0530 Subject: [PATCH 05/17] Bump base bound to 4.21 for GHC 9.12 --- os-string.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/os-string.cabal b/os-string.cabal index ee35404..0d8e333 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -63,7 +63,7 @@ library default-language: Haskell2010 build-depends: - , base >=4.12.0.0 && <4.21 + , base >=4.12.0.0 && <4.22 , bytestring >=0.11.3.0 , deepseq , exceptions From 42b0b12305c8627af8d1818e85fba955a1f839d7 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Fri, 15 Nov 2024 12:23:27 +0800 Subject: [PATCH 06/17] Don't catch asynchronous exceptions Fixes #22 --- System/OsString/Common.hs | 10 +++++----- System/OsString/Encoding/Internal.hs | 5 +++-- System/OsString/Internal/Exception.hs | 20 ++++++++++++++++++++ os-string.cabal | 1 + tests/encoding/EncodingSpec.hs | 9 +++++---- 5 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 System/OsString/Internal/Exception.hs diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 9916acc..c8a2780 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -143,7 +143,7 @@ module System.OsString.MODULE_NAME where - +import System.OsString.Internal.Exception import System.OsString.Internal.Types ( #ifdef WINDOWS WindowsString(..), WindowsChar(..) @@ -236,7 +236,7 @@ encodeWith :: TextEncoding -- ^ text encoding (wide char) -> String -> Either EncodingException PLATFORM_STRING encodeWith enc str = unsafePerformIO $ do - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> WindowsString <$> BS8.packCStringLen cstr + r <- trySafe @SomeException $ GHC.withCStringLen enc str $ \cstr -> WindowsString <$> BS8.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #else -- | Encode a 'String' with the specified encoding. @@ -244,7 +244,7 @@ encodeWith :: TextEncoding -> String -> Either EncodingException PLATFORM_STRING encodeWith enc str = unsafePerformIO $ do - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BSP.packCStringLen cstr + r <- trySafe @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BSP.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif @@ -340,7 +340,7 @@ decodeWith :: TextEncoding -> PLATFORM_STRING -> Either EncodingException String decodeWith winEnc (WindowsString ba) = unsafePerformIO $ do - r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen winEnc fp + r <- trySafe @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen winEnc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r #else -- | Decode a 'PosixString' with the specified encoding. @@ -350,7 +350,7 @@ decodeWith :: TextEncoding -> PLATFORM_STRING -> Either EncodingException String decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do - r <- try @SomeException $ BSP.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp + r <- trySafe @SomeException $ BSP.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif diff --git a/System/OsString/Encoding/Internal.hs b/System/OsString/Encoding/Internal.hs index 3466ac1..7c3cd88 100644 --- a/System/OsString/Encoding/Internal.hs +++ b/System/OsString/Encoding/Internal.hs @@ -10,6 +10,7 @@ module System.OsString.Encoding.Internal where import qualified System.OsString.Data.ByteString.Short as BS8 import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 +import System.OsString.Internal.Exception import GHC.Base import GHC.Real @@ -282,13 +283,13 @@ peekPosixString' fp = getLocaleEncoding >>= \enc -> GHC.peekCStringLen enc fp -- | Decode with the given 'TextEncoding'. decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String decodeWithTE enc ba = unsafePerformIO $ do - r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp + r <- trySafe @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r -- | Encode with the given 'TextEncoding'. encodeWithTE :: TextEncoding -> String -> Either EncodingException BS8.ShortByteString encodeWithTE enc str = unsafePerformIO $ do - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr + r <- trySafe @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r -- ----------------------------------------------------------------------------- diff --git a/System/OsString/Internal/Exception.hs b/System/OsString/Internal/Exception.hs new file mode 100644 index 0000000..c488af5 --- /dev/null +++ b/System/OsString/Internal/Exception.hs @@ -0,0 +1,20 @@ +module System.OsString.Internal.Exception where + +import Control.Exception ( catch, fromException, toException, throwIO, Exception, SomeAsyncException(..) ) + +-- | Like 'try', but rethrows async exceptions. +trySafe :: Exception e => IO a -> IO (Either e a) +trySafe ioA = catch action eHandler + where + action = do + v <- ioA + return (Right v) + eHandler e + | isAsyncException e = throwIO e + | otherwise = return (Left e) + +isAsyncException :: Exception e => e -> Bool +isAsyncException e = + case fromException (toException e) of + Just (SomeAsyncException _) -> True + Nothing -> False diff --git a/os-string.cabal b/os-string.cabal index 0d8e333..599c395 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -50,6 +50,7 @@ library System.OsString.Encoding.Internal System.OsString System.OsString.Internal + System.OsString.Internal.Exception System.OsString.Internal.Types System.OsString.Posix System.OsString.Windows diff --git a/tests/encoding/EncodingSpec.hs b/tests/encoding/EncodingSpec.hs index bf9e8f0..60420c1 100644 --- a/tests/encoding/EncodingSpec.hs +++ b/tests/encoding/EncodingSpec.hs @@ -14,6 +14,7 @@ import Test.QuickCheck import Data.Either ( isRight ) import qualified System.OsString.Data.ByteString.Short as BS8 import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 +import System.OsString.Internal.Exception import System.OsString.Encoding.Internal import GHC.IO (unsafePerformIO) import GHC.IO.Encoding ( setFileSystemEncoding ) @@ -154,21 +155,21 @@ padEven bs decodeP' :: BS8.ShortByteString -> Either String String decodeP' ba = unsafePerformIO $ do - r <- try @SomeException $ decodeWithBasePosix ba + r <- trySafe @SomeException $ decodeWithBasePosix ba evaluate $ force $ first displayException r encodeP' :: String -> Either String BS8.ShortByteString encodeP' str = unsafePerformIO $ do - r <- try @SomeException $ encodeWithBasePosix str + r <- trySafe @SomeException $ encodeWithBasePosix str evaluate $ force $ first displayException r decodeW' :: BS16.ShortByteString -> Either String String decodeW' ba = unsafePerformIO $ do - r <- try @SomeException $ decodeWithBaseWindows ba + r <- trySafe @SomeException $ decodeWithBaseWindows ba evaluate $ force $ first displayException r encodeW' :: String -> Either String BS8.ShortByteString encodeW' str = unsafePerformIO $ do - r <- try @SomeException $ encodeWithBaseWindows str + r <- trySafe @SomeException $ encodeWithBaseWindows str evaluate $ force $ first displayException r From 4b5efedcd2da9314edda80d973a44e67020370db Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 17 Nov 2024 20:57:26 +0800 Subject: [PATCH 07/17] Bump to 2.0.7 --- changelog.md | 4 ++++ os-string.cabal | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 2259038..55317f4 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string) +## 2.0.7 *Nov 2024* + +* don't catch asynchronous exceptions during encoding/decoding wrt [#22](https://github.com/haskell/os-string/issues/22) + ## 2.0.6 *Jun 2024* * add `fromString` on windows diff --git a/os-string.cabal b/os-string.cabal index 599c395..8672c27 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: os-string -version: 2.0.6 +version: 2.0.7 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause From 5d80fb05959f61d919470cb3a126f4a3072e8b83 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Fri, 27 Dec 2024 13:55:41 +0800 Subject: [PATCH 08/17] Use ghcup-setup in CI --- .github/workflows/test.yaml | 51 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index adfcd67..06b0fd5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -17,10 +17,10 @@ jobs: ghc: ['8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8'] cabal: ['3.8.1.0'] include: - - os: macOS-12 + - os: macOS-13 ghc: '9.4' cabal: '3.8.1.0' - - os: macOS-12 + - os: macOS-13 ghc: '9.6' cabal: '3.8.1.0' - os: windows-latest @@ -36,13 +36,16 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get -y update - sudo apt-get -y install libtinfo5 libtinfo6 libncurses5 libncurses6 + sudo apt-get -y install libtinfo6 libncurses6 + + - name: Install GHCup + uses: haskell/ghcup-setup@v1 - name: Install ghc/cabal run: | set -eux ghcup install ghc --set ${{ matrix.ghc }} - ghcup install cabal ${{ matrix.cabal }} + ghcup install cabal --set ${{ matrix.cabal }} shell: bash - name: Build @@ -59,21 +62,14 @@ jobs: i386: runs-on: ubuntu-latest - container: - image: i386/ubuntu:bionic steps: - - name: Install - run: | - apt-get update -y - apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh - - uses: actions/checkout@v1 - - name: Test - run: | - . ~/.ghcup/env - cabal update - cabal test - cabal bench + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run build (32 bit linux) + uses: docker://hasufell/i386-alpine-haskell:3.12 + with: + args: sh -c "cabal update && cabal test && cabal bench" # We use github.com/haskell self-hosted runners for ARM testing. # If they become unavailable in future, put ['armv7', 'aarch64'] @@ -123,6 +119,9 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Install GHCup + uses: haskell/ghcup-setup@v1 + - name: Run build run: | bash .github/scripts/brew.sh git coreutils llvm@13 autoconf automake @@ -132,8 +131,7 @@ jobs: export LD=ld export AR="$HOME/.brew/opt/llvm@13/bin/llvm-ar" export RANLIB="$HOME/.brew/opt/llvm@13/bin/llvm-ranlib" - . .github/scripts/env.sh - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 BOOTSTRAP_HASKELL_GHC_VERSION=${{ matrix.ghc }} BOOTSTRAP_HASKELL_ADJUST_BASHRC=yes sh + cabal update cabal test cabal bench env: @@ -153,11 +151,16 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Install prerequisites + run: | + sudo pkg install -y curl gcc gmp gmake ncurses perl5 libffi libiconv git bash misc/compat10x misc/compat11x misc/compat12x gmake autoconf + + - name: Install GHCup + uses: haskell/ghcup-setup@v1 + - name: Run build run: | - pkg install -y curl gcc gmp gmake ncurses perl5 libffi libiconv git bash misc/compat10x misc/compat11x misc/compat12x gmake llvm14 - . .github/scripts/env.sh - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 BOOTSTRAP_HASKELL_GHC_VERSION=${{ matrix.ghc }} BOOTSTRAP_HASKELL_ADJUST_BASHRC=yes sh + cabal update cabal test cabal bench @@ -174,7 +177,7 @@ jobs: arch: ['s390x', 'ppc64le'] steps: - uses: actions/checkout@v4 - - uses: uraimo/run-on-arch-action@v2.7.2 + - uses: uraimo/run-on-arch-action@v2.8.1 timeout-minutes: 60 with: arch: ${{ matrix.arch }} From a866ffc944a871366d7347e89b079091cc3334f7 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sat, 4 Jan 2025 17:06:43 +0800 Subject: [PATCH 09/17] Improve CI --- .github/workflows/test.yaml | 87 +++++++++++-------------------------- 1 file changed, 26 insertions(+), 61 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 06b0fd5..470e666 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -15,22 +15,17 @@ jobs: matrix: os: [ubuntu-latest] ghc: ['8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8'] - cabal: ['3.8.1.0'] include: - os: macOS-13 ghc: '9.4' - cabal: '3.8.1.0' - os: macOS-13 ghc: '9.6' - cabal: '3.8.1.0' - os: windows-latest ghc: '9.4' - cabal: '3.8.1.0' - os: windows-latest ghc: '9.6' - cabal: '3.8.1.0' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies (Ubuntu) if: runner.os == 'Linux' @@ -38,15 +33,10 @@ jobs: sudo apt-get -y update sudo apt-get -y install libtinfo6 libncurses6 - - name: Install GHCup - uses: haskell/ghcup-setup@v1 - - - name: Install ghc/cabal - run: | - set -eux - ghcup install ghc --set ${{ matrix.ghc }} - ghcup install cabal --set ${{ matrix.cabal }} - shell: bash + - uses: haskell/ghcup-setup@v1 + with: + ghc: ${{ matrix.ghc }} + cabal: latest - name: Build run: | @@ -54,7 +44,6 @@ jobs: cabal update cabal build --enable-tests --enable-benchmarks cabal test - cabal bench cabal haddock cabal check cabal sdist @@ -69,7 +58,7 @@ jobs: - name: Run build (32 bit linux) uses: docker://hasufell/i386-alpine-haskell:3.12 with: - args: sh -c "cabal update && cabal test && cabal bench" + args: sh -c "cabal update && cabal test" # We use github.com/haskell self-hosted runners for ARM testing. # If they become unavailable in future, put ['armv7', 'aarch64'] @@ -105,6 +94,7 @@ jobs: runs-on: ${{ matrix.os }} env: MACOSX_DEPLOYMENT_TARGET: 10.13 + HOMEBREW_CHANGE_ARCH_TO_ARM: 1 strategy: fail-fast: false matrix: @@ -119,23 +109,25 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Install GHCup - uses: haskell/ghcup-setup@v1 + - name: Install prerequisites + run: | + bash .github/scripts/brew.sh git coreutils llvm@13 autoconf automake + echo PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$HOME/.brew/opt/llvm@13/bin:$PATH" >> "$GITHUB_ENV" + echo CC="$HOME/.brew/opt/llvm@13/bin/clang" >> "$GITHUB_ENV" + echo CXX="$HOME/.brew/opt/llvm@13/bin/clang++" >> "$GITHUB_ENV" + echo LD=ld >> "$GITHUB_ENV" + echo AR="$HOME/.brew/opt/llvm@13/bin/llvm-ar" >> "$GITHUB_ENV" + echo RANLIB="$HOME/.brew/opt/llvm@13/bin/llvm-ranlib" >> "$GITHUB_ENV" + + - uses: haskell/ghcup-setup@v1 + with: + ghc: ${{ matrix.ghc }} + cabal: latest - name: Run build run: | - bash .github/scripts/brew.sh git coreutils llvm@13 autoconf automake - export PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$HOME/.brew/opt/llvm@13/bin:$PATH" - export CC="$HOME/.brew/opt/llvm@13/bin/clang" - export CXX="$HOME/.brew/opt/llvm@13/bin/clang++" - export LD=ld - export AR="$HOME/.brew/opt/llvm@13/bin/llvm-ar" - export RANLIB="$HOME/.brew/opt/llvm@13/bin/llvm-ranlib" cabal update cabal test - cabal bench - env: - HOMEBREW_CHANGE_ARCH_TO_ARM: 1 freebsd: runs-on: ${{ matrix.os }} @@ -153,42 +145,15 @@ jobs: - name: Install prerequisites run: | - sudo pkg install -y curl gcc gmp gmake ncurses perl5 libffi libiconv git bash misc/compat10x misc/compat11x misc/compat12x gmake autoconf + sudo pkg install -y curl gcc gmp gmake ncurses perl5 libffi libiconv git bash misc/compat10x misc/compat11x misc/compat12x gmake llvm14 autoconf - - name: Install GHCup - uses: haskell/ghcup-setup@v1 + - uses: haskell/ghcup-setup@v1 + with: + ghc: ${{ matrix.ghc }} + cabal: latest - name: Run build run: | cabal update cabal test cabal bench - - # Emulation is incredibly slow and memory demanding. It seems that any - # executable with GHC RTS takes at least 7-8 Gb of RAM, so we can run - # `cabal` or `ghc` on their own, but cannot run them both at the same time, - # striking out `cabal test`. Instead we rely on system packages and invoke - # `ghc --make` manually, and even so `ghc -O` is prohibitively expensive. - emulated: - runs-on: ubuntu-latest - strategy: - fail-fast: true - matrix: - arch: ['s390x', 'ppc64le'] - steps: - - uses: actions/checkout@v4 - - uses: uraimo/run-on-arch-action@v2.8.1 - timeout-minutes: 60 - with: - arch: ${{ matrix.arch }} - distro: ubuntu_rolling - githubToken: ${{ github.token }} - install: | - apt-get update -y - apt-get install -y curl ghc libghc-tasty-quickcheck-dev libghc-syb-dev - run: | - ghc --version - ghc --make -itests:tests/bytestring-tests -o Main tests/bytestring-tests/Main.hs +RTS -s - ./Main +RTS -s - ghc --make -itests:tests/encoding -o Main tests/encoding/Main.hs +RTS -s - ./Main +RTS -s From 43843aa2ae2b8f2dfb439d92f0c0a87cfc2298b2 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sat, 4 Jan 2025 18:22:13 +0800 Subject: [PATCH 10/17] Skip bench on armv7 --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 470e666..4ec94c7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -82,7 +82,7 @@ jobs: uses: docker://hasufell/arm32v7-ubuntu-haskell:focal name: Run build (arm32v7 linux) with: - args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2" - if: matrix.arch == 'arm64v8' uses: docker://hasufell/arm64v8-ubuntu-haskell:focal From 0545e47878123211916f1fd9ae4d8f209b32b2a9 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 5 Jan 2025 17:59:32 +0800 Subject: [PATCH 11/17] Improve CI --- .github/workflows/test.yaml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4ec94c7..8e8fb2f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -13,17 +13,15 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] - ghc: ['8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8'] + os: [ubuntu-latest, macOS-13, windows-latest] + ghc: ['9.4', '9.6', '9.8', '9.10', '9.12'] include: - - os: macOS-13 - ghc: '9.4' - - os: macOS-13 - ghc: '9.6' - - os: windows-latest - ghc: '9.4' - - os: windows-latest - ghc: '9.6' + - os: ubuntu-latest + ghc: '8.10' + - os: ubuntu-latest + ghc: '9.0' + - os: ubuntu-latest + ghc: '9.2' steps: - uses: actions/checkout@v4 @@ -88,7 +86,7 @@ jobs: uses: docker://hasufell/arm64v8-ubuntu-haskell:focal name: Run build (arm64v8 linux) with: - args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2" darwin_arm: runs-on: ${{ matrix.os }} From 2110c654eeffcc7782dac50633b8c27122d0528a Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 5 Jan 2025 18:20:19 +0800 Subject: [PATCH 12/17] Fix tests on 9.12 --- tests/encoding/EncodingSpec.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/encoding/EncodingSpec.hs b/tests/encoding/EncodingSpec.hs index 60420c1..6e30877 100644 --- a/tests/encoding/EncodingSpec.hs +++ b/tests/encoding/EncodingSpec.hs @@ -40,7 +40,9 @@ tests = let str = [toEnum 55296, toEnum 55297] encoded = encodeWithTE utf16le str decoded = decodeWithTE utf16le =<< encoded -#if __GLASGOW_HASKELL__ >= 910 +#if __GLASGOW_HASKELL__ >= 912 + in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) +#elif __GLASGOW_HASKELL__ >= 910 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing)) #elif __GLASGOW_HASKELL__ >= 904 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) @@ -72,7 +74,9 @@ tests = let str = [toEnum 0xDFF0, toEnum 0xDFF2] encoded = encodeWithTE (mkUTF8 RoundtripFailure) str decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded -#if __GLASGOW_HASKELL__ >= 910 +#if __GLASGOW_HASKELL__ >= 912 + in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) +#elif __GLASGOW_HASKELL__ >= 910 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing)) #elif __GLASGOW_HASKELL__ >= 904 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) From 28ce254235d28c57e1470dcbab0f91a4646f3274 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Thu, 6 Mar 2025 13:56:41 +0800 Subject: [PATCH 13/17] Fox System.OsString.length documentation --- System/OsString/Internal.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index 7f3d284..191495b 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -304,6 +304,10 @@ null = coerce PF.null -- | /O(1)/ The length of a 'OsString'. -- +-- This returns the number of code units +-- (@Word8@ on unix and @Word16@ on windows), not +-- bytes. +-- -- @since 1.4.200.0 length :: OsString -> Int length = coerce PF.length From 2e693aad07540173a0169971b27c9acac28eeff1 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Wed, 11 Jun 2025 17:15:14 -0400 Subject: [PATCH 14/17] Bump base upper bound to <4.23 Allowing GHC 9.14. --- os-string.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/os-string.cabal b/os-string.cabal index 8672c27..9543c61 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -64,7 +64,7 @@ library default-language: Haskell2010 build-depends: - , base >=4.12.0.0 && <4.22 + , base >=4.12.0.0 && <4.23 , bytestring >=0.11.3.0 , deepseq , exceptions From 9584e80920d83ab0d65473f80bdf0580f5063f41 Mon Sep 17 00:00:00 2001 From: maurges Date: Wed, 6 Aug 2025 15:38:53 +0200 Subject: [PATCH 15/17] Add conversions from ShortByteString --- System/OsString/Common.hs | 37 +++++++++++++++++++++++++++++++++++++ System/OsString/Internal.hs | 12 ++++++++++++ 2 files changed, 49 insertions(+) diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index c8a2780..26ef0b0 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -38,8 +38,10 @@ module System.OsString.MODULE_NAME , fromString #endif , fromBytes + , fromShortBytes #ifndef WINDOWS , fromBytestring + , fromShortBytestring #endif , pstr , singleton @@ -158,6 +160,8 @@ import Control.Monad.Catch ( MonadThrow, throwM ) import Data.ByteString.Internal ( ByteString ) +import Data.ByteString.Short.Internal + ( ShortByteString ) import Control.Exception ( SomeException, try, displayException ) import Control.DeepSeq ( force ) @@ -426,6 +430,33 @@ fromBytes bs = fromBytes = pure . PosixString . BSP.toShort #endif +#ifdef WINDOWS_DOC +-- | Constructs a platform string from a ShortByteString. +-- +-- This ensures valid UCS-2LE. +-- Note that this doesn't expand Word8 to Word16 on windows, so you may get invalid UTF-16. +-- +-- Throws 'EncodingException' on invalid UCS-2LE (although unlikely). +-- +-- @since 2.0.8 +#else +-- | Constructs a platform string from a ShortByteString. +-- +-- This is a no-op. +-- +-- @since 2.0.8 +#endif +fromShortBytes :: MonadThrow m + => ShortByteString + -> m PLATFORM_STRING +#ifdef WINDOWS +fromShortBytes bs = + let ws = WindowsString bs + in either throwM (const . pure $ ws) $ decodeWith ucs2le ws +#else +fromShortBytes = pure . PosixString +#endif + #ifndef WINDOWS -- | Like 'fromBytes', but not in IO. -- @@ -438,6 +469,12 @@ fromBytes = pure . PosixString . BSP.toShort -- @since 2.0.6 fromBytestring :: ByteString -> PosixString fromBytestring = PosixString . BSP.toShort + +-- | Like 'fromShortBytes', but not in IO, similarly to 'fromBytestring' +-- +-- @since 2.0.8 +fromShortBytestring :: ShortByteString -> PosixString +fromShortBytestring = PosixString #endif diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index 191495b..f1e42be 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -14,6 +14,8 @@ import Control.Monad.Catch ( MonadThrow ) import Data.ByteString ( ByteString ) +import Data.ByteString.Short + ( ShortByteString ) import Data.Char import Language.Haskell.TH.Quote ( QuasiQuoter (..) ) @@ -170,6 +172,16 @@ fromBytes :: MonadThrow m -> m OsString fromBytes = fmap OsString . PF.fromBytes +-- | Constructs an @OsString@ from a ShortByteString. +-- +-- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. +-- +-- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). +fromShortBytes :: MonadThrow m + => ShortByteString + -> m OsString +fromShortBytes = fmap OsString . PF.fromShortBytes + -- | QuasiQuote an 'OsString'. This accepts Unicode characters -- and encodes as UTF-8 on unix and UTF-16 on windows. From 50bfdffa8f87d56b031cd3d82202ceb623e533f8 Mon Sep 17 00:00:00 2001 From: maurges Date: Wed, 6 Aug 2025 17:40:48 +0200 Subject: [PATCH 16/17] Express fromBytes in terms of fromShortBytes --- System/OsString/Common.hs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 26ef0b0..14fd4d9 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -422,13 +422,7 @@ decodeFS (PosixString ba) = decodeWithBasePosix ba fromBytes :: MonadThrow m => ByteString -> m PLATFORM_STRING -#ifdef WINDOWS -fromBytes bs = - let ws = WindowsString . BS16.toShort $ bs - in either throwM (const . pure $ ws) $ decodeWith ucs2le ws -#else -fromBytes = pure . PosixString . BSP.toShort -#endif +fromBytes = fromShortBytes . BS16.toShort #ifdef WINDOWS_DOC -- | Constructs a platform string from a ShortByteString. From c08666bf7bf528e607fc1eacc20032ec59e69df3 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Thu, 7 Aug 2025 15:16:59 +0800 Subject: [PATCH 17/17] Release 2.0.8 --- changelog.md | 4 ++++ os-string.cabal | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 55317f4..5326d41 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string) +## 2.0.8 *Aug 2025* + +* Add safe conversions from ShortByteString [#34](https://github.com/haskell/os-string/pull/34) + ## 2.0.7 *Nov 2024* * don't catch asynchronous exceptions during encoding/decoding wrt [#22](https://github.com/haskell/os-string/issues/22) diff --git a/os-string.cabal b/os-string.cabal index 9543c61..6c114d4 100644 --- a/os-string.cabal +++ b/os-string.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: os-string -version: 2.0.7 +version: 2.0.8 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause