diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs index fcf6c16722..48d6b30540 100644 --- a/benchmark/Streamly/Benchmark/Data/Fold.hs +++ b/benchmark/Streamly/Benchmark/Data/Fold.hs @@ -201,7 +201,7 @@ toarr = Array.fromList . map (fromIntegral . ord) fileInfixTakeEndBy_ :: Handle -> IO Int fileInfixTakeEndBy_ inh = Stream.fold Fold.length - $ Stream.foldMany1 (FL.takeEndBy_ (== lf) Fold.drain) + $ Stream.foldManyPost (FL.takeEndBy_ (== lf) Fold.drain) $ Handle.read inh -- >>= print #ifdef INSPECTION @@ -256,7 +256,7 @@ inspect $ 'fileSuffixTakeEndBy `hasNoType` ''MutArray.ArrayUnsafe -- FH.read/A. splitOnSeq :: String -> Handle -> IO Int splitOnSeq str inh = Stream.fold Fold.length - $ Stream.foldMany1 (Fold.takeEndBySeq_ (toarr str) Fold.drain) + $ Stream.foldManyPost (Fold.takeEndBySeq_ (toarr str) Fold.drain) $ Handle.read inh -- >>= print #ifdef INSPECTION @@ -269,7 +269,7 @@ splitOnSeq100k :: Handle -> IO Int splitOnSeq100k inh = do arr <- Stream.fold Array.create $ Stream.replicate 100000 123 Stream.fold Fold.length - $ Stream.foldMany1 (Fold.takeEndBySeq_ arr Fold.drain) + $ Stream.foldManyPost (Fold.takeEndBySeq_ arr Fold.drain) $ Handle.read inh -- >>= print -- | Split on suffix sequence. @@ -356,7 +356,7 @@ o_1_space_reduce_read_split env = splitOnSeqUtf8 :: String -> Handle -> IO Int splitOnSeqUtf8 str inh = Stream.fold Fold.length - $ Stream.foldMany1 (Fold.takeEndBySeq_ (Array.fromList str) Fold.drain) + $ Stream.foldManyPost (Fold.takeEndBySeq_ (Array.fromList str) Fold.drain) $ Unicode.decodeUtf8Chunks $ Handle.readChunks inh -- >>= print diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs index 4cecf2da8a..a798f2c087 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs @@ -176,7 +176,7 @@ inspect $ 'concatMapRepl `hasNoType` ''SPEC unfoldManyRepl :: Int -> Int -> Int -> IO () unfoldManyRepl outer inner n = drain - $ S.unfoldMany + $ S.unfoldEach UF.replicateM (fmap ((inner,) . return) (sourceUnfoldrM outer n)) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs index 52787dbd7d..9009b8bfe2 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs @@ -140,7 +140,7 @@ foldMany1 :: Monad m => Stream m Int -> m () foldMany1 = Common.drain . fmap getSum - . S.foldMany1 (FL.take 2 FL.mconcat) + . S.foldManyPost (FL.take 2 FL.mconcat) . fmap Sum {-# INLINE refoldMany #-} diff --git a/benchmark/Streamly/Benchmark/Data/Stream/StreamD.hs b/benchmark/Streamly/Benchmark/Data/Stream/StreamD.hs index f020ad25fb..2040a4ccbf 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/StreamD.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/StreamD.hs @@ -448,7 +448,7 @@ inspect $ 'concatMapRepl `hasNoType` ''SPEC unfoldManyRepl :: Int -> Int -> Int -> IO () unfoldManyRepl outer inner n = S.drain - $ S.unfoldMany + $ S.unfoldEach UF.replicateM (S.map ((inner,) . return) (sourceUnfoldrMN outer n)) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs index 3df4fe2b38..ddb7a84a28 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs @@ -514,12 +514,12 @@ insertBy value n = composeN n $ Stream.insertBy compare (value + 1) {-# INLINE interposeSuffix #-} interposeSuffix :: Monad m => Int -> Int -> Stream m Int -> m () interposeSuffix value n = - composeN n $ Stream.interposeSuffix (value + 1) Unfold.identity + composeN n $ Stream.unfoldEachSepBy (value + 1) Unfold.identity {-# INLINE intercalateSuffix #-} intercalateSuffix :: Monad m => Int -> Int -> Stream m Int -> m () intercalateSuffix value n = - composeN n $ Stream.intercalateSuffix Unfold.identity (value + 1) + composeN n $ Stream.unfoldEachSepBySeq (value + 1) Unfold.identity o_1_space_inserting :: Int -> [Benchmark] o_1_space_inserting value = diff --git a/benchmark/Streamly/Benchmark/Data/Unfold.hs b/benchmark/Streamly/Benchmark/Data/Unfold.hs index 4273c18efa..57acd20585 100644 --- a/benchmark/Streamly/Benchmark/Data/Unfold.hs +++ b/benchmark/Streamly/Benchmark/Data/Unfold.hs @@ -605,7 +605,7 @@ concatCount linearCount = many :: Monad m => Int -> Int -> m () many linearCount start = do let end = start + concatCount linearCount - UF.fold FL.drain (UF.many (source end) (source end)) start + UF.fold FL.drain (UF.unfoldEach (source end) (source end)) start ------------------------------------------------------------------------------- -- Benchmarks diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 0e47be3e23..880fc08da3 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -219,7 +219,7 @@ chunksOfSum n inh = foldMany1ChunksOfSum :: Int -> Handle -> IO Int foldMany1ChunksOfSum n inh = S.fold Fold.length - $ IP.foldMany1 (FL.take n FL.sum) (S.unfold FH.reader inh) + $ IP.foldManyPost (FL.take n FL.sum) (S.unfold FH.reader inh) foldManyChunksOfSum :: Int -> Handle -> IO Int foldManyChunksOfSum n inh = diff --git a/core/src/Streamly/Data/Stream.hs b/core/src/Streamly/Data/Stream.hs index 1a31365b9f..c18849dc61 100644 --- a/core/src/Streamly/Data/Stream.hs +++ b/core/src/Streamly/Data/Stream.hs @@ -422,6 +422,7 @@ module Streamly.Data.Stream -- >>> elemIndices a = findIndices (== a) -- >>> uniq = Stream.scanMaybe (Fold.uniqBy (==)) -- >>> partition p = Stream.fold (Fold.partition Fold.toList Fold.toList) . fmap (if p then Left else Right) + -- >>> takeLast n s = Stream.fromEffect $ fmap Array.read $ Array.createOfLast n s -- , scanlMaybe , take , takeWhile @@ -505,15 +506,15 @@ module Streamly.Data.Stream -- * Unfold Each -- Idioms and equivalents of Data.List APIs: -- - -- >>> cycle = Stream.unfoldMany Unfold.fromList . Stream.repeat - -- >>> unlines = Stream.interposeSuffix '\n' - -- >>> unwords = Stream.interpose ' ' - -- >>> unlines = Stream.intercalateSuffix Unfold.fromList "\n" - -- >>> unwords = Stream.intercalate Unfold.fromList " " + -- >>> cycle = Stream.unfoldEach Unfold.fromList . Stream.repeat + -- >>> unlines = Stream.unfoldEachEndBy '\n' + -- >>> unwords = Stream.unfoldEachSepBy ' ' + -- >>> unlines = Stream.unfoldEachEndBySeq "\n" Unfold.fromList + -- >>> unwords = Stream.unfoldEachSepBySeq " " Unfold.fromList -- - , unfoldMany - , intercalate - , intercalateSuffix + , unfoldEach + , unfoldEachSepBySeq + , unfoldEachEndBySeq -- * Stream of streams -- | Stream operations like map and filter represent loops in @@ -550,7 +551,6 @@ module Streamly.Data.Stream -- >>> groupsByRolling eq = Stream.parseMany (Parser.groupByRolling eq Fold.toList) -- >>> groups = groupBy (==) , foldMany - , foldMany1 , groupsOf , parseMany @@ -564,8 +564,8 @@ module Streamly.Data.Stream -- >>> splitAt n = Stream.fold (Fold.splitAt n Fold.toList Fold.toList) -- >>> span p = Parser.splitWith (,) (Parser.takeWhile p Fold.toList) (Parser.fromFold Fold.toList) -- >>> break p = span (not . p) - , splitOn - , splitOnSeq + , splitSepBy_ + , splitSepBySeq_ , splitEndBySeq , splitEndBySeq_ , wordsBy @@ -674,6 +674,11 @@ module Streamly.Data.Stream , scan , scanMaybe , postscan + , splitOn + , splitOnSeq + , unfoldMany + , intercalate + , intercalateSuffix ) where diff --git a/core/src/Streamly/Data/Unfold.hs b/core/src/Streamly/Data/Unfold.hs index c7a4bb3e33..aaab70fe3d 100644 --- a/core/src/Streamly/Data/Unfold.hs +++ b/core/src/Streamly/Data/Unfold.hs @@ -11,7 +11,7 @@ -- Fast, composable stream producers with ability to terminate, supporting -- nested stream fusion. Nested stream operations like -- 'Streamly.Data.Stream.concatMap' in the "Streamly.Data.Stream" module do not --- fuse, however, the 'Streamly.Data.Stream.unfoldMany' operation, using the +-- fuse, however, the 'Streamly.Data.Stream.unfoldEach' operation, using the -- 'Unfold' type, is a fully fusible alternative to -- 'Streamly.Data.Stream.concatMap'. -- @@ -88,6 +88,9 @@ module Streamly.Data.Unfold , crossWith -- ** Nesting + , unfoldEach + + -- * Deprecated , many ) diff --git a/core/src/Streamly/Internal/Console/Stdio.hs b/core/src/Streamly/Internal/Console/Stdio.hs index 9f7ba66fda..eaeb88e6af 100644 --- a/core/src/Streamly/Internal/Console/Stdio.hs +++ b/core/src/Streamly/Internal/Console/Stdio.hs @@ -53,7 +53,6 @@ import Streamly.Internal.Data.Fold (Fold) import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.Stream as Stream - (intersperseMSuffix) import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.FileSystem.Handle as Handle import qualified Streamly.Internal.Unicode.Stream as Unicode @@ -224,5 +223,5 @@ putStrings = putStringsWith Unicode.encodeUtf8 putStringsLn :: MonadIO m => Stream m String -> m () putStringsLn = putChunks - . Stream.intersperseMSuffix (return $ Array.fromList [10]) + . Stream.intersperseEndByM (return $ Array.fromList [10]) . Unicode.encodeStrings Unicode.encodeUtf8 diff --git a/core/src/Streamly/Internal/Data/Array.hs b/core/src/Streamly/Internal/Data/Array.hs index 5a52bcb5c4..ddf698cce6 100644 --- a/core/src/Streamly/Internal/Data/Array.hs +++ b/core/src/Streamly/Internal/Data/Array.hs @@ -57,7 +57,7 @@ module Streamly.Internal.Data.Array -- , getSlice , sliceIndexerFromLen , slicerFromLen - , splitOn + , splitOn -- XXX slicesEndBy -- * Streaming Operations , streamTransform @@ -67,17 +67,15 @@ module Streamly.Internal.Data.Array , fold -- * Stream of Arrays + , concatSepBy + , concatEndBy + , concatEndBySeq - -- XXX these are probably not very useful to have in this module as we can - -- express these idiomatically using streams. - , interpose - , interposeSuffix - , intercalateSuffix - - , compactLE - , pinnedCompactLE - , compactOnByte - , compactOnByteSuffix + , compactMax + , compactMax' + , compactSepByByte_ + , compactEndByByte_ + , compactEndByLn_ , foldBreakChunks , foldChunks @@ -95,10 +93,18 @@ module Streamly.Internal.Data.Array , getSlicesFromLen , getIndices , writeLastN + , interpose + , interposeSuffix + , intercalateSuffix + , compactLE + , pinnedCompactLE + , compactOnByte + , compactOnByteSuffix ) where #include "assert.hs" +#include "deprecation.h" #include "inline.hs" #include "ArrayMacros.h" @@ -572,11 +578,14 @@ deserialize arr@(Array {..}) = unsafeInlineIO $ do -- | Insert the given element between arrays and flatten. -- --- >>> interpose x = Stream.interpose x Array.reader +-- >>> concatSepBy x = Stream.unfoldEachSepBy x Array.reader -- -{-# INLINE interpose #-} -interpose :: (Monad m, Unbox a) => a -> Stream m (Array a) -> Stream m a -interpose x = D.interpose x reader +{-# INLINE concatSepBy #-} +concatSepBy, interpose :: (Monad m, Unbox a) => + a -> Stream m (Array a) -> Stream m a +concatSepBy x = D.unfoldEachSepBy x reader + +RENAME(interpose,concatSepBy) data FlattenState s = OuterLoop s @@ -585,13 +594,13 @@ data FlattenState s = -- | Insert the given element after each array and flatten. This is similar to -- unlines. -- --- >>> interposeSuffix x = Stream.interposeSuffix x Array.reader +-- >>> concatEndBy x = Stream.unfoldEachEndBy x Array.reader -- -{-# INLINE_NORMAL interposeSuffix #-} -interposeSuffix :: forall m a. (Monad m, Unbox a) +{-# INLINE_NORMAL concatEndBy #-} +concatEndBy, interposeSuffix :: forall m a. (Monad m, Unbox a) => a -> Stream m (Array a) -> Stream m a --- interposeSuffix x = D.interposeSuffix x reader -interposeSuffix sep (D.Stream step state) = D.Stream step' (OuterLoop state) +-- concatEndBy x = D.unfoldEachEndBy x reader +concatEndBy sep (D.Stream step state) = D.Stream step' (OuterLoop state) where @@ -611,55 +620,81 @@ interposeSuffix sep (D.Stream step state) = D.Stream step' (OuterLoop state) let !x = unsafeInlineIO $ peekAt p contents return $ D.Yield x (InnerLoop st contents (INDEX_NEXT(p,a)) end) +RENAME(interposeSuffix,concatEndBy) + -- | Insert the given array after each array and flatten. -- --- >>> intercalateSuffix = Stream.intercalateSuffix Array.reader +-- >>> concatEndBySeq x = Stream.unfoldEachEndBySeq x Array.reader -- -{-# INLINE intercalateSuffix #-} -intercalateSuffix :: (Monad m, Unbox a) +{-# INLINE concatEndBySeq #-} +concatEndBySeq, intercalateSuffix :: (Monad m, Unbox a) => Array a -> Stream m (Array a) -> Stream m a -intercalateSuffix = D.intercalateSuffix reader +concatEndBySeq x = D.unfoldEachEndBySeq x reader + +RENAME(intercalateSuffix,concatEndBySeq) --- | @compactLE n@ coalesces adjacent arrays in the input stream +-- | @compactMax n@ coalesces adjacent arrays in the input stream -- only if the combined size would be less than or equal to n. -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -{-# INLINE_NORMAL compactLE #-} -compactLE :: (MonadIO m, Unbox a) +{-# INLINE_NORMAL compactMax #-} +compactMax, compactLE :: (MonadIO m, Unbox a) => Int -> Stream m (Array a) -> Stream m (Array a) -compactLE n stream = - D.map unsafeFreeze $ MA.compactLE n $ D.map unsafeThaw stream +compactMax n stream = + D.map unsafeFreeze $ MA.compactMax n $ D.map unsafeThaw stream + +RENAME(compactLE,compactMax) --- | Pinned version of 'compactLE'. -{-# INLINE_NORMAL pinnedCompactLE #-} -pinnedCompactLE :: (MonadIO m, Unbox a) +-- | Like 'compactMax' but generates pinned arrays. +{-# INLINE_NORMAL compactMax' #-} +compactMax', pinnedCompactLE :: (MonadIO m, Unbox a) => Int -> Stream m (Array a) -> Stream m (Array a) -pinnedCompactLE n stream = - D.map unsafeFreeze $ MA.pinnedCompactLE n $ D.map unsafeThaw stream +compactMax' n stream = + D.map unsafeFreeze $ MA.compactMax' n $ D.map unsafeThaw stream --- | Split a stream of arrays on a given separator byte, dropping the separator --- and coalescing all the arrays between two separators into a single array. +{-# DEPRECATED pinnedCompactLE "Please use compactMax' instead." #-} +{-# INLINE pinnedCompactLE #-} +pinnedCompactLE = compactMax' + +-- | Split a stream of byte arrays on a given separator byte, dropping the +-- separator and coalescing all the arrays between two separators into a single +-- array. -- -{-# INLINE compactOnByte #-} -compactOnByte +{-# INLINE compactSepByByte_ #-} +compactSepByByte_, compactOnByte :: (MonadIO m) => Word8 -> Stream m (Array Word8) -> Stream m (Array Word8) -compactOnByte byte = - fmap unsafeFreeze . MA.compactOnByte byte . fmap unsafeThaw +compactSepByByte_ byte = + fmap unsafeFreeze . MA.compactSepByByte_ byte . fmap unsafeThaw + +RENAME(compactOnByte,compactSepByByte_) --- | Like 'compactOnByte' considers the separator in suffix position instead of --- infix position. -{-# INLINE compactOnByteSuffix #-} -compactOnByteSuffix +-- | Like 'compactSepByByte_', but considers the separator in suffix position +-- instead of infix position. +{-# INLINE compactEndByByte_ #-} +compactEndByByte_, compactOnByteSuffix :: (MonadIO m) => Word8 -> Stream m (Array Word8) -> Stream m (Array Word8) -compactOnByteSuffix byte = - fmap unsafeFreeze . MA.compactOnByteSuffix byte . fmap unsafeThaw +compactEndByByte_ byte = + fmap unsafeFreeze . MA.compactEndByByte_ byte . fmap unsafeThaw +-- compactEndByByte_ byte = chunksEndBy_ (== byte) . concat + +RENAME(compactOnByteSuffix,compactEndByByte_) + +-- XXX On windows we should compact on "\r\n". We can just compact on '\n' and +-- drop the last byte in each array if it is '\r'. + +-- | Compact byte arrays on newline character, dropping the newline char. +{-# INLINE compactEndByLn_ #-} +compactEndByLn_ :: MonadIO m + => Stream m (Array Word8) + -> Stream m (Array Word8) +compactEndByLn_ = compactEndByByte_ 10 ------------------------------------------------------------------------------- -- Folding Streams of Arrays @@ -709,11 +744,11 @@ foldBreakChunks (Fold fstep initial _ final) stream@(Stream step state) = do -- | Fold a stream of arrays using a 'Fold'. This is equivalent to the -- following: -- --- >>> foldChunks f = Stream.fold f . Stream.unfoldMany Array.reader +-- >>> foldChunks f = Stream.fold f . Stream.unfoldEach Array.reader -- foldChunks :: (MonadIO m, Unbox a) => Fold m a b -> Stream m (Array a) -> m b foldChunks f s = fmap fst (foldBreakChunks f s) --- foldStream f = Stream.fold f . Stream.unfoldMany reader +-- foldStream f = Stream.fold f . Stream.unfoldEach reader -- | Fold a stream of arrays using a 'Fold' and return the remaining stream. -- diff --git a/core/src/Streamly/Internal/Data/Array/Type.hs b/core/src/Streamly/Internal/Data/Array/Type.hs index 167d5d7b9e..40cc0b9ef8 100644 --- a/core/src/Streamly/Internal/Data/Array/Type.hs +++ b/core/src/Streamly/Internal/Data/Array/Type.hs @@ -3,6 +3,7 @@ -- Must come after TypeFamilies, otherwise it is re-enabled. -- MonoLocalBinds enabled by TypeFamilies causes perf regressions in general. {-# LANGUAGE NoMonoLocalBinds #-} +{-# OPTIONS_GHC -Wno-deprecations #-} -- | -- Module : Streamly.Internal.Data.Array.Type -- Copyright : (c) 2020 Composewell Technologies @@ -112,6 +113,10 @@ module Streamly.Internal.Data.Array.Type , chunksOf , pinnedChunksOf , buildChunks + , chunksEndBy + , chunksEndBy' + , chunksEndByLn + , chunksEndByLn' -- *** Split -- | Split an array into slices. @@ -123,11 +128,11 @@ module Streamly.Internal.Data.Array.Type -- *** Compact -- | Append the arrays in a stream to form a stream of larger arrays. - , fCompactGE - , fPinnedCompactGE - , lCompactGE - , lPinnedCompactGE - , compactGE + , createCompactMin + , createCompactMin' + , scanCompactMin + , scanCompactMin' + , compactMin -- ** Deprecated , asPtrUnsafe @@ -153,15 +158,22 @@ module Streamly.Internal.Data.Array.Type , pinnedWrite , fromByteStr# , fromByteStr + , fCompactGE + , fPinnedCompactGE + , lCompactGE + , lPinnedCompactGE + , compactGE ) where #include "ArrayMacros.h" +#include "deprecation.h" #include "inline.hs" import Control.Exception (assert) import Control.Monad (replicateM, when) import Control.Monad.IO.Class (MonadIO(..)) +import Data.Char (ord) import Data.Functor.Identity (Identity(..)) import Data.Int (Int8, Int16, Int32, Int64) import Data.Proxy (Proxy(..)) @@ -176,6 +188,7 @@ import Streamly.Internal.Data.Producer.Type (Producer(..)) import Streamly.Internal.Data.MutArray.Type (MutArray(..)) import Streamly.Internal.Data.MutByteArray.Type (MutByteArray) import Streamly.Internal.Data.Fold.Type (Fold(..)) +import Streamly.Internal.Data.Scanl.Type (Scanl (..)) import Streamly.Internal.Data.Stream.Type (Stream) import Streamly.Internal.Data.StreamK.Type (StreamK) import Streamly.Internal.Data.Unbox (Unbox(..)) @@ -191,6 +204,7 @@ import qualified Streamly.Internal.Data.Stream.Type as D import qualified Streamly.Internal.Data.StreamK.Type as K import qualified Streamly.Internal.Data.MutByteArray.Type as Unboxed import qualified Streamly.Internal.Data.Producer as Producer +import qualified Streamly.Internal.Data.Scanl.Type as Scanl import qualified Streamly.Internal.Data.Unfold.Type as Unfold import qualified Text.ParserCombinators.ReadPrec as ReadPrec @@ -479,6 +493,38 @@ pinnedChunksOf :: forall m a. (MonadIO m, Unbox a) => Int -> D.Stream m a -> D.Stream m (Array a) pinnedChunksOf n str = D.map unsafeFreeze $ MA.pinnedChunksOf n str +-- | Create arrays from the input stream using a predicate to find the end of +-- the chunk. When the predicate matches, the chunk ends, the matching element +-- is included in the chunk. +-- +-- Definition: +-- +-- >>> chunksEndBy p = Stream.foldMany (Fold.takeEndBy p Array.create) +-- +{-# INLINE chunksEndBy #-} +chunksEndBy :: forall m a. (MonadIO m, Unbox a) + => (a -> Bool) -> D.Stream m a -> D.Stream m (Array a) +chunksEndBy p = D.foldMany (Fold.takeEndBy p create) + +-- | Like 'chunksEndBy' but creates pinned arrays. +-- +{-# INLINE chunksEndBy' #-} +chunksEndBy' :: forall m a. (MonadIO m, Unbox a) + => (a -> Bool) -> D.Stream m a -> D.Stream m (Array a) +chunksEndBy' p = D.foldMany (Fold.takeEndBy p pinnedCreate) + +-- | Create chunks using newline as the separator, including it. +{-# INLINE chunksEndByLn #-} +chunksEndByLn :: (MonadIO m) + => D.Stream m Word8 -> D.Stream m (Array Word8) +chunksEndByLn = chunksEndBy (== fromIntegral (ord '\n')) + +-- | Like 'chunksEndByLn' but creates pinned arrays. +{-# INLINE chunksEndByLn' #-} +chunksEndByLn' :: (MonadIO m) + => D.Stream m Word8 -> D.Stream m (Array Word8) +chunksEndByLn' = chunksEndBy' (== fromIntegral (ord '\n')) + -- | Convert a stream of arrays into a stream of their elements. -- -- >>> concat = Stream.unfoldMany Array.reader @@ -522,41 +568,53 @@ flattenArraysRev = concatRev -- arrays would have no capacity to append, therefore, a copy will be forced -- anyway. --- | Fold @fCompactGE n@ coalesces adjacent arrays in the input stream --- until the size becomes greater than or equal to n. +-- | Fold @createCompactBySizeGE n@ coalesces adjacent arrays in the input +-- stream until the size becomes greater than or equal to n. -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -{-# INLINE_NORMAL fCompactGE #-} -fCompactGE :: (MonadIO m, Unbox a) => Int -> Fold m (Array a) (Array a) -fCompactGE n = fmap unsafeFreeze $ Fold.lmap unsafeThaw $ MA.fCompactGE n - --- | PInned version of 'fCompactGE'. -{-# INLINE_NORMAL fPinnedCompactGE #-} -fPinnedCompactGE :: (MonadIO m, Unbox a) => Int -> Fold m (Array a) (Array a) -fPinnedCompactGE n = - fmap unsafeFreeze $ Fold.lmap unsafeThaw $ MA.fPinnedCompactGE n - --- | @compactGE n stream@ coalesces adjacent arrays in the @stream@ until +{-# INLINE_NORMAL createCompactMin #-} +createCompactMin, fCompactGE :: (MonadIO m, Unbox a) => + Int -> Fold m (Array a) (Array a) +createCompactMin n = + fmap unsafeFreeze $ Fold.lmap unsafeThaw $ MA.createCompactMin n + +RENAME(fCompactGE,createCompactMin) + +-- | Pinned version of 'createCompactMin'. +{-# INLINE_NORMAL createCompactMin' #-} +createCompactMin', fPinnedCompactGE :: (MonadIO m, Unbox a) => + Int -> Fold m (Array a) (Array a) +createCompactMin' n = + fmap unsafeFreeze $ Fold.lmap unsafeThaw $ MA.createCompactMin' n + +{-# DEPRECATED fPinnedCompactGE "Please use createCompactMin' instead." #-} +{-# INLINE fPinnedCompactGE #-} +fPinnedCompactGE = createCompactMin + +-- | @compactBySize n stream@ coalesces adjacent arrays in the @stream@ until -- the size becomes greater than or equal to @n@. -- --- >>> compactGE n = Stream.foldMany (Array.fCompactGE n) +-- >>> compactBySize n = Stream.foldMany (Array.createCompactBySizeGE n) -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -{-# INLINE compactGE #-} -compactGE :: +{-# INLINE compactMin #-} +compactMin, compactGE :: (MonadIO m, Unbox a) => Int -> Stream m (Array a) -> Stream m (Array a) -compactGE n stream = - D.map unsafeFreeze $ MA.compactGE n $ D.map unsafeThaw stream +compactMin n stream = + D.map unsafeFreeze $ MA.compactMin n $ D.map unsafeThaw stream --- | Like 'compactGE' but for transforming folds instead of stream. +RENAME(compactGE,compactMin) + +-- | Like 'compactBySizeGE' but for transforming folds instead of stream. -- --- >>> lCompactGE n = Fold.many (Array.fCompactGE n) +-- >>> lCompactBySizeGE n = Fold.many (Array.createCompactBySizeGE n) -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. +{-# DEPRECATED lCompactGE "Please use scanCompactMin instead." #-} {-# INLINE_NORMAL lCompactGE #-} lCompactGE :: (MonadIO m, Unbox a) => Int -> Fold m (Array a) () -> Fold m (Array a) () @@ -564,12 +622,29 @@ lCompactGE n fld = Fold.lmap unsafeThaw $ MA.lCompactGE n (Fold.lmap unsafeFreeze fld) -- | Pinned version of 'lCompactGE'. +{-# DEPRECATED lPinnedCompactGE "Please use scanCompactMin' instead." #-} {-# INLINE_NORMAL lPinnedCompactGE #-} lPinnedCompactGE :: (MonadIO m, Unbox a) => Int -> Fold m (Array a) () -> Fold m (Array a) () lPinnedCompactGE n fld = Fold.lmap unsafeThaw $ MA.lPinnedCompactGE n (Fold.lmap unsafeFreeze fld) +{-# INLINE scanCompactMin #-} +scanCompactMin :: forall m a. (MonadIO m, Unbox a) + => Int -> Scanl m (Array a) (Maybe (Array a)) +scanCompactMin n = + Scanl.lmap unsafeThaw + $ fmap (fmap unsafeFreeze) + $ MA.scanCompactMin n + +{-# INLINE scanCompactMin' #-} +scanCompactMin' :: forall m a. (MonadIO m, Unbox a) + => Int -> Scanl m (Array a) (Maybe (Array a)) +scanCompactMin' n = + Scanl.lmap unsafeThaw + $ fmap (fmap unsafeFreeze) + $ MA.scanCompactMin' n + ------------------------------------------------------------------------------- -- Splitting ------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Data/MutArray.hs b/core/src/Streamly/Internal/Data/MutArray.hs index 774f21f9c7..3c33dd1ec7 100644 --- a/core/src/Streamly/Internal/Data/MutArray.hs +++ b/core/src/Streamly/Internal/Data/MutArray.hs @@ -20,10 +20,11 @@ module Streamly.Internal.Data.MutArray -- * MutArray module , sliceIndexerFromLen , slicerFromLen - , compactLE - , pinnedCompactLE - , compactOnByte - , compactOnByteSuffix + , compactMax + , compactMax' + , compactSepByByte_ + , compactEndByByte_ + , compactEndByLn_ -- * Unboxed IORef , module Streamly.Internal.Data.IORef.Unboxed @@ -37,10 +38,15 @@ module Streamly.Internal.Data.MutArray -- * Deprecated , genSlicesFromLen , getSlicesFromLen + , compactLE + , pinnedCompactLE + , compactOnByte + , compactOnByteSuffix ) where #include "assert.hs" +#include "deprecation.h" #include "inline.hs" #include "ArrayMacros.h" @@ -206,21 +212,27 @@ deserialize arr@(MutArray {..}) = do -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -{-# INLINE compactLE #-} -compactLE :: (MonadIO m, Unbox a) => +{-# INLINE compactMax #-} +compactMax, compactLE :: (MonadIO m, Unbox a) => Int -> Stream m (MutArray a) -> Stream m (MutArray a) -- XXX compactLE can be moved to MutArray/Type if we are not using the parser -- to implement it. -compactLE = compactLeAs Unpinned +compactMax = compactLeAs Unpinned -- The parser version turns out to be a little bit slower. -- compactLE n = Stream.catRights . Stream.parseManyD (pCompactLE n) --- | Pinned version of 'compactLE'. -{-# INLINE pinnedCompactLE #-} -pinnedCompactLE :: forall m a. (MonadIO m, Unbox a) +RENAME(compactLE,compactMax) + +-- | Like 'compactBySizeLE' but generates pinned arrays. +{-# INLINE_NORMAL compactMax' #-} +compactMax', pinnedCompactLE :: forall m a. (MonadIO m, Unbox a) => Int -> Stream m (MutArray a) -> Stream m (MutArray a) -pinnedCompactLE = compactLeAs Pinned --- pinnedCompactLE n = Stream.catRights . Stream.parseManyD (pPinnedCompactLE n) +compactMax' = compactLeAs Pinned +-- compactMax' n = Stream.catRights . Stream.parseManyD (pPinnedCompactLE n) + +{-# DEPRECATED pinnedCompactLE "Please use compactMax' instead." #-} +{-# INLINE pinnedCompactLE #-} +pinnedCompactLE = compactMax' data SplitState s arr = Initial s @@ -232,13 +244,13 @@ data SplitState s arr -- | Split a stream of arrays on a given separator byte, dropping the separator -- and coalescing all the arrays between two separators into a single array. -- -{-# INLINE_NORMAL _compactOnByteCustom #-} -_compactOnByteCustom +{-# INLINE_NORMAL _compactSepByByteCustom #-} +_compactSepByByteCustom :: MonadIO m => Word8 -> Stream m (MutArray Word8) -> Stream m (MutArray Word8) -_compactOnByteCustom byte (Stream.Stream step state) = +_compactSepByByteCustom byte (Stream.Stream step state) = Stream.Stream step' (Initial state) where @@ -280,14 +292,15 @@ _compactOnByteCustom byte (Stream.Stream step state) = step' _ (Yielding arr next) = return $ Stream.Yield arr next step' _ Finishing = return Stream.Stop --- XXX implement predicate based version of this --- XXX Naming of predicate based vs custom version +-- XXX implement predicate based version of this compactSepBy_, compactEndBy_ +-- XXX the versions that use equality can be named compactSepByElem_ etc. The +-- byte/word etc versions of that can be specialized using rewrite rules. -- | Split a stream of arrays on a given separator byte, dropping the separator -- and coalescing all the arrays between two separators into a single array. -- -{-# INLINE compactOnByte #-} -compactOnByte +{-# INLINE compactSepByByte_ #-} +compactSepByByte_, compactOnByte :: (MonadIO m) => Word8 -> Stream m (MutArray Word8) @@ -295,17 +308,32 @@ compactOnByte -- XXX compare perf of custom vs idiomatic version -- compactOnByte = _compactOnByteCustom -- XXX use spliceExp and rightSize? -compactOnByte byte = Stream.splitInnerBy (breakOn byte) splice +compactSepByByte_ byte = Stream.splitInnerBy (breakOn byte) splice + +RENAME(compactOnByte,compactSepByByte_) --- | Like 'compactOnByte' considers the separator in suffix position instead of --- infix position. -{-# INLINE compactOnByteSuffix #-} -compactOnByteSuffix +-- | Split a stream of arrays on a given separator byte, dropping the separator +-- and coalescing all the arrays between two separators into a single array. +-- +{-# INLINE compactEndByByte_ #-} +compactEndByByte_, compactOnByteSuffix :: (MonadIO m) => Word8 -> Stream m (MutArray Word8) -> Stream m (MutArray Word8) -compactOnByteSuffix byte = +compactEndByByte_ byte = -- XXX use spliceExp and rightSize? Stream.splitInnerBySuffix (\arr -> byteLength arr == 0) (breakOn byte) splice + +RENAME(compactOnByteSuffix,compactEndByByte_) + +-- XXX On windows we should compact on "\r\n". We can just compact on '\n' and +-- drop the last byte in each array if it is '\r'. + +-- | Compact byte arrays on newline character, dropping the newline char. +{-# INLINE compactEndByLn_ #-} +compactEndByLn_ :: MonadIO m + => Stream m (MutArray Word8) + -> Stream m (MutArray Word8) +compactEndByLn_ = compactEndByByte_ 10 diff --git a/core/src/Streamly/Internal/Data/MutArray/Stream.hs b/core/src/Streamly/Internal/Data/MutArray/Stream.hs index 981067d97a..2970d63b19 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Stream.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Stream.hs @@ -1,3 +1,4 @@ +{-# OPTIONS_GHC -Wno-deprecations #-} -- | -- Module : Streamly.Internal.Data.MutArray.Stream -- Copyright : (c) 2019 Composewell Technologies diff --git a/core/src/Streamly/Internal/Data/MutArray/Type.hs b/core/src/Streamly/Internal/Data/MutArray/Type.hs index 0be0c3ea05..eb87aa3211 100644 --- a/core/src/Streamly/Internal/Data/MutArray/Type.hs +++ b/core/src/Streamly/Internal/Data/MutArray/Type.hs @@ -244,35 +244,52 @@ module Streamly.Internal.Data.MutArray.Type -- *** Chunk -- | Group a stream into arrays. , chunksOf - , pinnedChunksOf + , pinnedChunksOf -- chunksOf' + -- , timedChunksOf -- see the Streamly.Data.Stream.Prelude module , buildChunks + , chunksEndBy + , chunksEndBy' + , chunksEndByLn + , chunksEndByLn' + -- , chunksBeginBySeq -- for parsing streams with headers -- *** Split -- | Split an array into slices. -- , getSlicesFromLenN - , splitOn + , splitOn -- slicesEndBy -- , slicesOf -- *** Concat -- | Append the arrays in a stream to form a stream of elements. - , concatWith - , concatRevWith , concat + -- , concatSepBy + -- , concatEndBy + -- , concatEndByLn -- unlines - concat a byte chunk stream using newline byte separator + , concatWith -- internal , concatRev + , concatRevWith -- internal -- *** Compact -- | Append the arrays in a stream to form a stream of larger arrays. , SpliceState (..) - , pCompactLE - , pPinnedCompactLE - , compactLeAs - , fCompactGE - , fPinnedCompactGE - , lCompactGE - , lPinnedCompactGE - , compactGE - , compactEQ + , compactLeAs -- internal + + -- Creation folds/parsers + , createCompactMax + , createCompactMax' + , createCompactMin + , createCompactMin' + + -- Stream compaction + , compactMin + -- , compactMin' + , compactExact + -- , compactExact' + + -- Scans + , scanCompactMin + , scanCompactMin' -- ** Utilities , isPower2 @@ -327,6 +344,13 @@ module Streamly.Internal.Data.MutArray.Type , pinnedWrite , writeRevN , fromByteStr# + , pCompactLE + , pPinnedCompactLE + , fCompactGE + , fPinnedCompactGE + , lPinnedCompactGE + , lCompactGE + , compactGE ) where @@ -340,6 +364,7 @@ import Control.Monad (when) import Control.Monad.IO.Class (MonadIO(..)) import Data.Bifunctor (first) import Data.Bits (shiftR, (.|.), (.&.)) +import Data.Char (ord) import Data.Functor.Identity (Identity(..)) import Data.Proxy (Proxy(..)) import Data.Word (Word8, Word16) @@ -363,6 +388,7 @@ import GHC.Ptr (Ptr(..)) import Streamly.Internal.Data.Fold.Type (Fold(..)) import Streamly.Internal.Data.Producer.Type (Producer (..)) +import Streamly.Internal.Data.Scanl.Type (Scanl (..)) import Streamly.Internal.Data.Stream.Type (Stream) import Streamly.Internal.Data.Parser.Type (Parser (..)) import Streamly.Internal.Data.StreamK.Type (StreamK) @@ -1693,6 +1719,38 @@ pinnedChunksOf :: forall m a. (MonadIO m, Unbox a) -- pinnedChunksOf n = D.foldMany (pinnedCreateOf n) pinnedChunksOf = chunksOfAs Pinned +-- | Create arrays from the input stream using a predicate to find the end of +-- the chunk. When the predicate matches, the chunk ends, the matching element +-- is included in the chunk. +-- +-- Definition: +-- +-- >>> chunksEndBy p = Stream.foldMany (Fold.takeEndBy p MutArray.create) +-- +{-# INLINE chunksEndBy #-} +chunksEndBy :: forall m a. (MonadIO m, Unbox a) + => (a -> Bool) -> D.Stream m a -> D.Stream m (MutArray a) +chunksEndBy p = D.foldMany (FL.takeEndBy p create) + +-- | Like 'chunksEndBy' but creates pinned arrays. +-- +{-# INLINE chunksEndBy' #-} +chunksEndBy' :: forall m a. (MonadIO m, Unbox a) + => (a -> Bool) -> D.Stream m a -> D.Stream m (MutArray a) +chunksEndBy' p = D.foldMany (FL.takeEndBy p pinnedCreate) + +-- | Create chunks using newline as the separator, including it. +{-# INLINE chunksEndByLn #-} +chunksEndByLn :: (MonadIO m) + => D.Stream m Word8 -> D.Stream m (MutArray Word8) +chunksEndByLn = chunksEndBy (== fromIntegral (ord '\n')) + +-- | Like 'chunksEndByLn' but creates pinned arrays. +{-# INLINE chunksEndByLn' #-} +chunksEndByLn' :: (MonadIO m) + => D.Stream m Word8 -> D.Stream m (MutArray Word8) +chunksEndByLn' = chunksEndBy' (== fromIntegral (ord '\n')) + -- | When we are buffering a stream of unknown size into an array we do not -- know how much space to pre-allocate. So we start with the min size and emit -- the array then keep on doubling the size every time. Thus we do not need to @@ -3062,7 +3120,7 @@ byteEq arr1 arr2 = fmap (EQ ==) $ byteCmp arr1 arr2 -- Note: LE versions avoid an extra copy compared to GE. LE parser trades -- backtracking one array in lieu of avoiding a copy. However, LE and GE both --- can leave some memory unused. They can split the last array to fit it +-- can leave some memory unused. They may split the last array to fit it -- exactly in the space. {-# INLINE_NORMAL pCompactLeAs #-} @@ -3109,29 +3167,38 @@ pCompactLeAs ps maxElems = Parser step initial extract extract Nothing = return $ Parser.Done 0 nil extract (Just buf) = return $ Parser.Done 0 buf --- | Parser @pCompactLE maxElems@ coalesces adjacent arrays in the input stream --- only if the combined size would be less than or equal to @maxElems@ --- elements. Note that it won't split an array if the original array is already --- larger than maxElems. +-- | Parser @createCompactMax maxElems@ coalesces adjacent arrays in the +-- input stream only if the combined size would be less than or equal to +-- @maxElems@ elements. Note that it won't split an array if the original array +-- is already larger than maxElems. -- -- @maxElems@ must be greater than 0. -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -- +-- Note that a fold compacting to less than or equal to a given size is not +-- possible, as folds cannot backtrack. +-- -- /Internal/ -{-# INLINE pCompactLE #-} -pCompactLE :: +{-# INLINE createCompactMax #-} +createCompactMax, pCompactLE :: forall m a. (MonadIO m, Unbox a) => Int -> Parser (MutArray a) m (MutArray a) -pCompactLE = pCompactLeAs Unpinned +createCompactMax = pCompactLeAs Unpinned --- | Pinned version of 'pCompactLE'. -{-# INLINE pPinnedCompactLE #-} -pPinnedCompactLE :: +RENAME(pCompactLE,createCompactMax) + +-- | Pinned version of 'createCompactMax'. +{-# INLINE createCompactMax' #-} +createCompactMax', pPinnedCompactLE :: forall m a. (MonadIO m, Unbox a) => Int -> Parser (MutArray a) m (MutArray a) -pPinnedCompactLE = pCompactLeAs Pinned +createCompactMax' = pCompactLeAs Pinned + +{-# DEPRECATED pPinnedCompactLE "Please use createCompactMax' instead." #-} +{-# INLINE pPinnedCompactLE #-} +pPinnedCompactLE = createCompactMax' data SpliceState s arr = SpliceInitial s @@ -3139,11 +3206,10 @@ data SpliceState s arr | SpliceYielding arr (SpliceState s arr) | SpliceFinish --- This mutates the first array (if it has space) to append values from the +-- | This mutates the first array (if it has space) to append values from the -- second one. This would work for immutable arrays as well because an -- immutable array never has additional space so a new array is allocated -- instead of mutating it. - {-# INLINE_NORMAL compactLeAs #-} compactLeAs :: forall m a. (MonadIO m, Unbox a) => PinnedState -> Int -> D.Stream m (MutArray a) -> D.Stream m (MutArray a) @@ -3239,23 +3305,29 @@ fCompactGeAs ps minElems = Fold step initial extract extract extract Nothing = return nil extract (Just buf) = return buf --- | Fold @fCompactGE minElems@ coalesces adjacent arrays in the input stream --- until the size becomes greater than or equal to @minElems@. +-- | Fold @createCompactMin minElems@ coalesces adjacent arrays in the +-- input stream until the size becomes greater than or equal to @minElems@. -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. -{-# INLINE fCompactGE #-} -fCompactGE :: +{-# INLINE createCompactMin #-} +createCompactMin, fCompactGE :: forall m a. (MonadIO m, Unbox a) => Int -> FL.Fold m (MutArray a) (MutArray a) -fCompactGE = fCompactGeAs Unpinned +createCompactMin = fCompactGeAs Unpinned --- | Pinned version of 'fCompactGE'. -{-# INLINE fPinnedCompactGE #-} -fPinnedCompactGE :: +RENAME(fCompactGE,createCompactMin) + +-- | Pinned version of 'createCompactMin'. +{-# INLINE createCompactMin' #-} +createCompactMin', fPinnedCompactGE :: forall m a. (MonadIO m, Unbox a) => Int -> FL.Fold m (MutArray a) (MutArray a) -fPinnedCompactGE = fCompactGeAs Pinned +createCompactMin' = fCompactGeAs Pinned + +{-# DEPRECATED fPinnedCompactGE "Please use createCompactMin' instead." #-} +{-# INLINE fPinnedCompactGE #-} +fPinnedCompactGE = createCompactMin' {-# INLINE_NORMAL lCompactGeAs #-} lCompactGeAs :: forall m a. (MonadIO m, Unbox a) @@ -3323,40 +3395,110 @@ lCompactGeAs ps minElems (Fold step1 initial1 _ final1) = -- | Like 'compactGE' but for transforming folds instead of stream. -- --- >>> lCompactGE n = Fold.many (MutArray.fCompactGE n) +-- >> lCompactGE n = Fold.many (MutArray.fCompactGE n) -- -- Generates unpinned arrays irrespective of the pinning status of input -- arrays. +{-# DEPRECATED lCompactGE "Please use scanCompactMin instead." #-} {-# INLINE lCompactGE #-} lCompactGE :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m (MutArray a) () -> Fold m (MutArray a) () lCompactGE = lCompactGeAs Unpinned -- | Pinned version of 'lCompactGE'. +{-# DEPRECATED lPinnedCompactGE "Please use scanCompactMin' instead." #-} {-# INLINE lPinnedCompactGE #-} lPinnedCompactGE :: forall m a. (MonadIO m, Unbox a) => Int -> Fold m (MutArray a) () -> Fold m (MutArray a) () lPinnedCompactGE = lCompactGeAs Pinned --- | @compactGE n stream@ coalesces adjacent arrays in the @stream@ until --- the size becomes greater than or equal to @n@. +data CompactMinState arr = + CompactMinInit | CompactMinIncomplete arr | CompactMinComplete arr + +{-# INLINE_NORMAL scanCompactMinAs #-} +scanCompactMinAs :: forall m a. (MonadIO m, Unbox a) + => PinnedState -> Int -> Scanl m (MutArray a) (Maybe (MutArray a)) +scanCompactMinAs ps minElems = + Scanl step initial extract final + + where + + minBytes = minElems * SIZE_OF(a) + + functionName = "Streamly.Internal.Data.MutArray.scanCompactMin" + + initial = do + when (minElems <= 0) $ + -- XXX we can pass the module string from the higher level API + error $ functionName ++ ": the size of arrays [" + ++ show minElems ++ "] must be a natural number" + + return $ FL.Partial CompactMinInit + + {-# INLINE runInner #-} + runInner len buf = + if len >= minBytes + then do + return $ FL.Partial $ CompactMinComplete buf + else return $ FL.Partial $ CompactMinIncomplete buf + + step CompactMinInit arr = + runInner (byteLength arr) arr + + step (CompactMinComplete _) arr = + runInner (byteLength arr) arr + + -- XXX Buffer arrays as a list to avoid copy and reallocations + step (CompactMinIncomplete buf) arr = do + let len = byteLength buf + byteLength arr + buf1 <- if byteCapacity buf < len + then liftIO $ reallocExplicitAs + ps (SIZE_OF(a)) (max minBytes len) buf + else return buf + buf2 <- unsafeSplice buf1 arr + runInner len buf2 + + extract CompactMinInit = return Nothing + extract (CompactMinComplete arr) = return (Just arr) + extract (CompactMinIncomplete _) = return Nothing + + final CompactMinInit = return Nothing + final (CompactMinComplete arr) = return (Just arr) + final (CompactMinIncomplete arr) = return (Just arr) + +-- | Like 'compactMin' but a scan. +{-# INLINE scanCompactMin #-} +scanCompactMin :: forall m a. (MonadIO m, Unbox a) + => Int -> Scanl m (MutArray a) (Maybe (MutArray a)) +scanCompactMin = scanCompactMinAs Unpinned + +-- | Like 'compactMin'' but a scan. +{-# INLINE scanCompactMin' #-} +scanCompactMin' :: forall m a. (MonadIO m, Unbox a) + => Int -> Scanl m (MutArray a) (Maybe (MutArray a)) +scanCompactMin' = scanCompactMinAs Pinned + +-- | @compactMin n stream@ coalesces adjacent arrays in the @stream@ until +-- the compacted array size becomes greater than or equal to @n@. -- --- >>> compactGE n = Stream.foldMany (MutArray.fCompactGE n) +-- >>> compactMin n = Stream.foldMany (MutArray.createCompactMin n) -- -{-# INLINE compactGE #-} -compactGE :: +{-# INLINE compactMin #-} +compactMin, compactGE :: (MonadIO m, Unbox a) => Int -> Stream m (MutArray a) -> Stream m (MutArray a) -compactGE n = D.foldMany (fCompactGE n) +compactMin n = D.foldMany (createCompactMin n) + +RENAME(compactGE,compactMin) --- | 'compactEQ n' coalesces adajacent arrays in the input stream to +-- | 'compactExact n' coalesces adajacent arrays in the input stream to -- arrays of exact size @n@. -- -- /Unimplemented/ -{-# INLINE compactEQ #-} -compactEQ :: -- (MonadIO m, Unbox a) => +{-# INLINE compactExact #-} +compactExact :: -- (MonadIO m, Unbox a) => Int -> Stream m (MutArray a) -> Stream m (MutArray a) -compactEQ _n = undefined -- D.parseManyD (pCompactEQ n) +compactExact _n = undefined -- D.parseManyD (pCompactEQ n) ------------------------------------------------------------------------------- -- In-place mutation algorithms @@ -3435,23 +3577,23 @@ bubble cmp0 arr = -- Renaming -------------------------------------------------------------------------------- -RENAME(realloc, reallocBytes) -RENAME(castUnsafe, unsafeCast) -RENAME(newArrayWith, emptyWithAligned) -RENAME(getSliceUnsafe, unsafeGetSlice) -RENAME(putIndexUnsafe, unsafePutIndex) -RENAME(modifyIndexUnsafe, unsafeModifyIndex) -RENAME(getIndexUnsafe, unsafeGetIndex) -RENAME(snocUnsafe, unsafeSnoc) -RENAME(spliceUnsafe, unsafeSplice) -RENAME(pokeSkipUnsafe, unsafePokeSkip) -RENAME(peekSkipUnsafe, unsafePeekSkip) -RENAME(peekUncons, peek) -RENAME(peekUnconsUnsafe, unsafePeek) -RENAME(pokeAppend, poke) -RENAME(pokeAppendMay, pokeMay) +RENAME(realloc,reallocBytes) +RENAME(castUnsafe,unsafeCast) +RENAME(newArrayWith,emptyWithAligned) +RENAME(getSliceUnsafe,unsafeGetSlice) +RENAME(putIndexUnsafe,unsafePutIndex) +RENAME(modifyIndexUnsafe,unsafeModifyIndex) +RENAME(getIndexUnsafe,unsafeGetIndex) +RENAME(snocUnsafe,unsafeSnoc) +RENAME(spliceUnsafe,unsafeSplice) +RENAME(pokeSkipUnsafe,unsafePokeSkip) +RENAME(peekSkipUnsafe,unsafePeekSkip) +RENAME(peekUncons,peek) +RENAME(peekUnconsUnsafe,unsafePeek) +RENAME(pokeAppend,poke) +RENAME(pokeAppendMay,pokeMay) -- This renaming can be done directly without deprecations. But I'm keeping this -- intentionally. Packdiff should be able to point out such APIs that we can -- just remove. -RENAME(createOfWith, createWithOf) +RENAME(createOfWith,createWithOf) diff --git a/core/src/Streamly/Internal/Data/Parser.hs b/core/src/Streamly/Internal/Data/Parser.hs index 3e34fffff5..fae982235f 100644 --- a/core/src/Streamly/Internal/Data/Parser.hs +++ b/core/src/Streamly/Internal/Data/Parser.hs @@ -84,7 +84,7 @@ module Streamly.Internal.Data.Parser , dropWhile -- ** Separated by elements - -- | Separator could be in prefix postion ('takeStartBy'), or suffix + -- | Separator could be in prefix postion ('takeBeginBy'), or suffix -- position ('takeEndBy'). See 'deintercalate', 'sepBy' etc for infix -- separator parsing, also see 'intersperseQuotedBy' fold. @@ -94,8 +94,8 @@ module Streamly.Internal.Data.Parser , takeEndBy_ , takeEndByEsc -- , takeEndByEsc_ - , takeStartBy - , takeStartBy_ + , takeBeginBy + , takeBeginBy_ , takeEitherSepBy , wordBy @@ -115,7 +115,7 @@ module Streamly.Internal.Data.Parser -- Framed by separate start and end characters, potentially nested. -- blockWithQuotes allows quotes inside a block. However, - -- takeFramedByGeneric can be used to express takeStartBy, takeEndBy and + -- takeFramedByGeneric can be used to express takeBeginBy, takeEndBy and -- block with escaping. -- , takeFramedBy , takeFramedBy_ @@ -239,10 +239,13 @@ module Streamly.Internal.Data.Parser -- * Deprecated , next + , takeStartBy + , takeStartBy_ ) where #include "inline.hs" +#include "deprecation.h" #include "assert.hs" import Control.Monad (when) @@ -1042,7 +1045,7 @@ takeFramedByGeneric esc begin end (Fold fstep finitial _ ffinal) = if isEnd a then Done 0 <$> ffinal s else process s a n - Nothing -> -- takeStartBy case + Nothing -> -- takeBeginBy case case begin of Just isBegin -> if isBegin a @@ -1284,14 +1287,14 @@ takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond -- * Stops - when the predicate succeeds in non-leading position. -- * Fails - when the predicate fails in the leading position. -- --- >>> splitWithPrefix p f = Stream.parseMany (Parser.takeStartBy p f) +-- >>> splitWithPrefix p f = Stream.parseMany (Parser.takeBeginBy p f) -- -- Examples: - -- --- >>> p = Parser.takeStartBy (== ',') Fold.toList +-- >>> p = Parser.takeBeginBy (== ',') Fold.toList -- >>> leadingComma = Stream.parse p . Stream.fromList -- >>> leadingComma "a,b" --- Left (ParseError "takeStartBy: missing frame start") +-- Left (ParseError "takeBeginBy: missing frame start") -- ... -- >>> leadingComma ",," -- Right "," @@ -1302,9 +1305,10 @@ takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond -- -- /Pre-release/ -- -{-# INLINE takeStartBy #-} -takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeStartBy cond (Fold fstep finitial _ ffinal) = +{-# INLINE takeBeginBy #-} +takeBeginBy, takeStartBy :: Monad m => + (a -> Bool) -> Fold m a b -> Parser a m b +takeBeginBy cond (Fold fstep finitial _ ffinal) = Parser step initial extract @@ -1315,7 +1319,7 @@ takeStartBy cond (Fold fstep finitial _ ffinal) = return $ case res of FL.Partial s -> IPartial (Left' s) - FL.Done _ -> IError "takeStartBy: fold done without input" + FL.Done _ -> IError "takeBeginBy: fold done without input" {-# INLINE process #-} process s a = do @@ -1328,7 +1332,7 @@ takeStartBy cond (Fold fstep finitial _ ffinal) = step (Left' s) a = if cond a then process s a - else return $ Error "takeStartBy: missing frame start" + else return $ Error "takeBeginBy: missing frame start" step (Right' s) a = if not (cond a) then process s a @@ -1337,13 +1341,18 @@ takeStartBy cond (Fold fstep finitial _ ffinal) = extract (Left' s) = fmap (Done 0) $ ffinal s extract (Right' s) = fmap (Done 0) $ ffinal s --- | Like 'takeStartBy' but drops the separator. +RENAME(takeStartBy,takeBeginBy) + +-- | Like 'takeBeginBy' but drops the separator. -- --- >>> takeStartBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing +-- >>> takeBeginBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing -- -{-# INLINE takeStartBy_ #-} -takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeStartBy_ isBegin = takeFramedByGeneric Nothing (Just isBegin) Nothing +{-# INLINE takeBeginBy_ #-} +takeBeginBy_, takeStartBy_ :: Monad m => + (a -> Bool) -> Fold m a b -> Parser a m b +takeBeginBy_ isBegin = takeFramedByGeneric Nothing (Just isBegin) Nothing + +RENAME(takeStartBy_,takeBeginBy_) -- | @takeFramedByEsc_ isEsc isBegin isEnd fold@ parses a token framed using a -- begin and end predicate, and an escape character. The frame begin and end diff --git a/core/src/Streamly/Internal/Data/Stream.hs b/core/src/Streamly/Internal/Data/Stream.hs index 8bc7ec225d..812bb4d1c0 100644 --- a/core/src/Streamly/Internal/Data/Stream.hs +++ b/core/src/Streamly/Internal/Data/Stream.hs @@ -7,12 +7,11 @@ -- Portability : GHC -- -- Direct style re-implementation of CPS stream in --- "Streamly.Internal.Data.StreamK". The symbol or suffix 'D' in this --- module denotes the "Direct" style. GHC is able to INLINE and fuse direct +-- "Streamly.Internal.Data.StreamK". GHC is able to INLINE and fuse direct -- style better, providing better performance than CPS implementation. -- -- @ --- import qualified Streamly.Internal.Data.Stream as D +-- import qualified Streamly.Internal.Data.Stream as Stream -- @ module Streamly.Internal.Data.Stream diff --git a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs index d9df92e2fd..7d47588ccb 100644 --- a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs +++ b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs @@ -776,7 +776,7 @@ isInfixOf :: (MonadIO m, Eq a, Enum a, Unbox a) isInfixOf infx stream = do arr <- fold Array.create infx -- XXX can use breakOnSeq instead (when available) - r <- null $ StreamD.drop 1 $ Nesting.splitOnSeq arr Fold.drain stream + r <- null $ StreamD.drop 1 $ Nesting.splitSepBySeq_ arr Fold.drain stream return (not r) -- Note: isPrefixOf uses the prefix stream only once. In contrast, isSuffixOf diff --git a/core/src/Streamly/Internal/Data/Stream/Nesting.hs b/core/src/Streamly/Internal/Data/Stream/Nesting.hs index a692b4a0df..5516d09106 100644 --- a/core/src/Streamly/Internal/Data/Stream/Nesting.hs +++ b/core/src/Streamly/Internal/Data/Stream/Nesting.hs @@ -17,9 +17,6 @@ -- -- These combinators involve transformation, generation, elimination so can be -- classified under any of those. --- --- Ultimately these operations should be supported by Unfolds, Pipes and Folds, --- and this module may become redundant. -- The zipWithM combinator in this module has been adapted from the vector -- package (c) Roman Leshchinskiy. @@ -32,27 +29,32 @@ module Streamly.Internal.Data.Stream.Nesting -- ** Combine Two Streams -- | Functions ending in the shape: -- - -- @t m a -> t m a -> t m a@. + -- @Stream m a -> Stream m a -> Stream m a@. -- *** Appending -- | Append a stream after another. A special case of concatMap or - -- unfoldMany. + -- unfoldEach Note, appending more than two streams is called @concat@ + -- which could be called appendMany or appendAll in append terminology and + -- is equivalent to @concatMap id@. Append is equivalent to @mergeBy fst@. AppendState(..) , append -- *** Interleaving -- | Interleave elements from two streams alternately. A special case of - -- unfoldInterleave. + -- unfoldEachInterleave. Interleave is equivalent to mergeBy with a round + -- robin merge function. , InterleaveState(..) , interleave - , interleaveMin - , interleaveFst - , interleaveFstSuffix + , interleaveEndBy' + , interleaveSepBy' + , interleaveBeginBy + , interleaveEndBy + , interleaveSepBy -- *** Scheduling -- | Execute streams alternately irrespective of whether they generate -- elements or not. Note 'interleave' would execute a stream until it - -- yields an element. A special case of unfoldRoundRobin. + -- yields an element. A special case of unfoldEachRoundRobin. , roundRobin -- interleaveFair?/ParallelFair -- *** Merging @@ -66,34 +68,39 @@ module Streamly.Internal.Data.Stream.Nesting -- | Functions generally ending in these shapes: -- -- @ - -- concat: f (t m a) -> t m a - -- concatMap: (a -> t m b) -> t m a -> t m b - -- unfoldMany: Unfold m a b -> t m a -> t m b + -- concat: f (Stream m a) -> Stream m a + -- concatMap: (a -> Stream m b) -> Stream m a -> Stream m b + -- unfoldEach: Unfold m a b -> Stream m a -> Stream m b -- @ - -- *** ConcatUnfold + -- *** unfoldEach -- | Generate streams by using an unfold on each element of an input -- stream, append the resulting streams and flatten. A special case of - -- gintercalate. + -- intercalate. + , unfoldEachFoldBy , ConcatUnfoldInterleaveState (..) - , unfoldInterleave - , unfoldRoundRobin - - -- *** Interpose - -- | Like unfoldMany but intersperses an effect between the streams. A - -- special case of gintercalate. - , interpose - , interposeM - , interposeSuffix - , interposeSuffixM - - -- *** Intercalate - -- | Like unfoldMany but intersperses streams from another source between - -- the streams from the first source. - , gintercalate - , gintercalateSuffix - , intercalate - , intercalateSuffix + , unfoldEachInterleave + , unfoldEachInterleaveRev + , unfoldEachRoundRobin + + -- *** unfoldEach joined by elements + -- | Like unfoldEach but intersperses an element between the streams after + -- unfolding. A special case of intercalate. + , unfoldEachSepBy + , unfoldEachSepByM + , unfoldEachEndBy + , unfoldEachEndByM + + -- *** unfoldEach joined by sequences + -- | Like unfoldEach but intersperses a sequence between the unfolded + -- streams before unfolding. A special case of intercalate. + , unfoldEachSepBySeq + , unfoldEachEndBySeq + + -- *** unfoldEach joined by streams + -- | Like unfoldEach but intersperses streams between the unfolded streams. + , intercalateSepBy + , intercalateEndBy -- * Eliminate -- | Folding and Parsing chunks of streams to eliminate nested streams. @@ -111,20 +118,17 @@ module Streamly.Internal.Data.Stream.Nesting -- ** Parsing -- | Parsing is opposite to flattening. 'parseMany' is dual to concatMap or - -- unfoldMany. concatMap generates a stream from single values in a + -- unfoldEach concatMap generates a stream from single values in a -- stream and flattens, parseMany does the opposite of flattening by -- splitting the stream and then folds each such split to single value in -- the output stream. , parseMany - , parseManyD , parseSequence , parseManyTill , parseIterate - , parseIterateD -- ** Grouping -- | Group segments of a stream and fold. Special case of parsing. - , groupsBy , groupsWhile , groupsRollingBy @@ -133,15 +137,14 @@ module Streamly.Internal.Data.Stream.Nesting , takeEndBySeq , takeEndBySeq_ , wordsBy - , splitOnSeq - , splitOnSuffixSeq + , splitSepBySeq_ , splitEndBySeq , splitEndBySeq_ + , splitOnSuffixSeq -- internal - -- XXX Implement these as folds or parsers instead. - , splitOnSuffixSeqAny - , splitOnPrefix - , splitOnAny + , splitBeginBy_ + , splitEndBySeqOneOf + , splitSepBySeqOneOf -- * Transform (Nested Containers) -- | Opposite to compact in ArrayStream @@ -152,9 +155,29 @@ module Streamly.Internal.Data.Stream.Nesting , dropPrefix , dropInfix , dropSuffix + + -- * Deprecated + , interpose + , interposeM + , interposeSuffix + , interposeSuffixM + , gintercalate + , gintercalateSuffix + , intercalate + , intercalateSuffix + , unfoldInterleave + , unfoldRoundRobin + , interleaveMin + , interleaveFst + , interleaveFstSuffix + , parseManyD + , parseIterateD + , groupsBy + , splitOnSeq ) where +#include "deprecation.h" #include "inline.hs" #include "ArrayMacros.h" @@ -181,9 +204,11 @@ import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.Parser as PRD import qualified Streamly.Internal.Data.Ring as RB +import qualified Streamly.Internal.Data.Stream.Generate as Stream +import qualified Streamly.Internal.Data.Unfold.Type as Unfold import Streamly.Internal.Data.Stream.Transform - (intersperse, intersperseMSuffix) + (intersperse, intersperseEndByM) import Streamly.Internal.Data.Stream.Type import Prelude hiding (concatMap, mapM, zipWith) @@ -196,14 +221,14 @@ import Prelude hiding (concatMap, mapM, zipWith) data AppendState s1 s2 = AppendFirst s1 | AppendSecond s2 --- From an implementation perspective, StreamK.'Streamly.Data.StreamK.append' --- translates into a function call whereas Stream.'append' translates into a --- conditional branch (jump). However, the overhead of the function call in --- StreamK.append is incurred only once, while the overhead of the conditional --- branch in fused append is incurred for each element in the stream. As a --- result, StreamK.append has a linear time complexity of O(n), while fused --- append has a quadratic time complexity of O(n^2), where @n@ represents the --- number of 'append's used. +-- Performance Note: From an implementation perspective, +-- StreamK.'Streamly.Data.StreamK.append' translates into a function call +-- whereas Stream.'append' translates into a conditional branch (jump). +-- However, the overhead of the function call in StreamK.append is incurred +-- only once, while the overhead of the conditional branch in fused append is +-- incurred for each element in the stream. As a result, StreamK.append has a +-- linear time complexity of O(n), while fused append has a quadratic time +-- complexity of O(n^2), where @n@ represents the number of 'append's used. -- | WARNING! O(n^2) time complexity wrt number of streams. Suitable for -- statically fusing a small number of streams. Use the O(n) complexity @@ -246,13 +271,35 @@ append (Stream step1 state1) (Stream step2 state2) = data InterleaveState s1 s2 = InterleaveFirst s1 s2 | InterleaveSecond s1 s2 | InterleaveSecondOnly s2 | InterleaveFirstOnly s1 +-- XXX Ideally we should change the order of the arguments but we have the same +-- convention in append as well, we will have to change that too. Also, the +-- argument order of append makes sense for infix use. + -- | WARNING! O(n^2) time complexity wrt number of streams. Suitable for -- statically fusing a small number of streams. Use the O(n) complexity -- StreamK.'Streamly.Data.StreamK.interleave' otherwise. -- --- Interleaves two streams, yielding one element from each stream alternately. --- When one stream stops the rest of the other stream is used in the output --- stream. +-- Interleaves two streams, yielding one element from each stream alternately, +-- starting from the first stream. When one stream is exhausted, all the +-- remaining elements of the other stream are emitted in the output stream. +-- +-- Both the streams are completely exhausted. +-- +-- @ +-- (a b c) (. . .) => a . b . c . +-- (a b c) (. . ) => a . b . c +-- (a b ) (. . .) => a . b . . +-- @ +-- +-- Examples: +-- +-- >>> f x y = Stream.toList $ Stream.interleave (Stream.fromList x) (Stream.fromList y) +-- >>> f "abc" "..." +-- "a.b.c." +-- >>> f "abc" ".." +-- "a.b.c" +-- >>> f "ab" "..." +-- "a.b.." -- {-# INLINE_NORMAL interleave #-} interleave :: Monad m => Stream m a -> Stream m a -> Stream m a @@ -290,9 +337,49 @@ interleave (Stream step1 state1) (Stream step2 state2) = Skip s -> Skip (InterleaveSecondOnly s) Stop -> Stop +-- XXX Check the performance of the implementation, we can write a custom one. + +{-# ANN module "HLint: ignore Use zip" #-} + +-- | Interleave the two streams such that the elements of the second stream are +-- ended by the elements of the first stream. If one of the streams is +-- exhausted then interleaving stops. +-- +-- @ +-- (. . .) (a b c) => a . b . c . +-- (. . ) (a b c) => a . b . -- c is discarded +-- (. . .) (a b ) => a . b . -- . is discarded +-- @ +-- +-- Examples: +-- +-- >>> f x y = Stream.toList $ Stream.interleaveEndBy' (Stream.fromList x) (Stream.fromList y) +-- >>> f "..." "abc" +-- "a.b.c." +-- >>> f ".." "abc" +-- "a.b." +-- >>> f "..." "ab" +-- "a.b." +-- +-- Definition: +-- +-- >>> interleaveEndBy' s1 s2 = unfoldEach Unfold.fromTuple $ Stream.zipWith (,) s2 s1 +-- +-- Similarly, we can defined interleaveBeginBy' as: +-- +-- >>> interleaveBeginBy' = flip interleaveEndBy' +-- +{-# INLINE_NORMAL interleaveEndBy' #-} +interleaveEndBy' :: Monad m => Stream m a -> Stream m a -> Stream m a +interleaveEndBy' s1 s2 = unfoldEach Unfold.fromTuple $ zipWith (,) s2 s1 + -- | Like `interleave` but stops interleaving as soon as any of the two streams --- stops. +-- stops. The suffix 'Min' in the name determines the stop behavior. -- +-- This is the same as interleaveEndBy' but it might emit an additional element +-- at the end. +-- +{-# DEPRECATED interleaveMin "Please use flip interleaveEndBy' instead." #-} {-# INLINE_NORMAL interleaveMin #-} interleaveMin :: Monad m => Stream m a -> Stream m a -> Stream m a interleaveMin (Stream step1 state1) (Stream step2 state2) = @@ -318,28 +405,74 @@ interleaveMin (Stream step1 state1) (Stream step2 state2) = step _ (InterleaveFirstOnly _) = undefined step _ (InterleaveSecondOnly _) = undefined --- | Interleaves the outputs of two streams, yielding elements from each stream --- alternately, starting from the first stream. As soon as the first stream --- finishes, the output stops, discarding the remaining part of the second --- stream. In this case, the last element in the resulting stream would be from --- the second stream. If the second stream finishes early then the first stream --- still continues to yield elements until it finishes. +-- | Interleave the two streams such that the elements of the first stream are +-- infixed between the elements of the second stream. If one of the streams is +-- exhausted then interleaving stops. -- --- >>> :set -XOverloadedStrings --- >>> import Data.Functor.Identity (Identity) --- >>> Stream.interleaveFstSuffix "abc" ",,,," :: Stream Identity Char --- fromList "a,b,c," --- >>> Stream.interleaveFstSuffix "abc" "," :: Stream Identity Char --- fromList "a,bc" +-- @ +-- (. . .) (a b c) => a . b . c -- additional . is discarded +-- (. . ) (a b c) => a . b . c +-- (. ) (a b c) => a . b -- c is discarded +-- @ -- --- 'interleaveFstSuffix' is a dual of 'interleaveFst'. +-- >>> f x y = Stream.toList $ Stream.interleaveSepBy' (Stream.fromList x) (Stream.fromList y) +-- >>> f "..." "abc" +-- "a.b.c" +-- >>> f ".." "abc" +-- "a.b.c" +-- >>> f "." "abc" +-- "a.b" +-- +{-# INLINE_NORMAL interleaveSepBy' #-} +interleaveSepBy' :: Monad m => Stream m a -> Stream m a -> Stream m a +-- XXX Not an efficient implementation, need to write a fused one. +interleaveSepBy' s1 s2 = concatEffect $ do + r <- uncons s2 + case r of + Nothing -> return Stream.nil + Just (h, t) -> + return $ h `Stream.cons` + unfoldEach Unfold.fromTuple (zipWith (,) s1 t) + +-- | Interleave the two streams such that the elements of the second stream are +-- prefixed by the elements of the first stream. Interleaving stops when and +-- only when the second stream is exhausted. Shortfall of the prefix stream is +-- ignored and excess is discarded. -- --- Do not use dynamically. +-- @ +-- (. . .) (a b c) => . a . b . c +-- (. . .) (a b ) => . a . b -- additional . is discarded +-- (. . ) (a b c) => . a . b c -- missing . is ignored +-- @ -- --- /Pre-release/ -{-# INLINE_NORMAL interleaveFstSuffix #-} -interleaveFstSuffix :: Monad m => Stream m a -> Stream m a -> Stream m a -interleaveFstSuffix (Stream step1 state1) (Stream step2 state2) = +-- /Unimplemented/ +-- +{-# INLINE_NORMAL interleaveBeginBy #-} +interleaveBeginBy :: -- Monad m => + Stream m a -> Stream m a -> Stream m a +interleaveBeginBy = undefined + +-- | Like 'interleaveEndBy'' but interleaving stops when and only when the +-- second stream is exhausted. Shortfall of the suffix stream is ignored and +-- excess is discarded. +-- +-- @ +-- (. . .) (a b c) => a . b . c . +-- (. . ) (a b c) => a . b . c -- missing . is ignored +-- (. . .) (a b ) => a . b . -- additional . is discarded +-- @ +-- +-- >>> f x y = Stream.toList $ Stream.interleaveEndBy (Stream.fromList x) (Stream.fromList y) +-- >>> f "..." "abc" +-- "a.b.c." +-- >>> f ".." "abc" +-- "a.b.c" +-- >>> f "..." "ab" +-- "a.b." +-- +{-# INLINE_NORMAL interleaveEndBy #-} +interleaveEndBy :: Monad m => Stream m a -> Stream m a -> Stream m a +interleaveEndBy (Stream step2 state2) (Stream step1 state1) = Stream step (InterleaveFirst state1 state2) where @@ -368,6 +501,11 @@ interleaveFstSuffix (Stream step1 state1) (Stream step2 state2) = step _ (InterleaveSecondOnly _) = undefined +{-# INLINE interleaveFstSuffix #-} +{-# DEPRECATED interleaveFstSuffix "Please use flip interleaveEndBy instead." #-} +interleaveFstSuffix :: Monad m => Stream m a -> Stream m a -> Stream m a +interleaveFstSuffix = flip interleaveEndBy + data InterleaveInfixState s1 s2 a = InterleaveInfixFirst s1 s2 | InterleaveInfixSecondBuf s1 s2 @@ -375,28 +513,29 @@ data InterleaveInfixState s1 s2 a | InterleaveInfixFirstYield s1 s2 a | InterleaveInfixFirstOnly s1 --- | Interleaves the outputs of two streams, yielding elements from each stream --- alternately, starting from the first stream and ending at the first stream. --- If the second stream is longer than the first, elements from the second --- stream are infixed with elements from the first stream. If the first stream --- is longer then it continues yielding elements even after the second stream --- has finished. +-- | Like 'interleaveSepBy'' but interleaving stops when and only when the +-- second stream is exhausted. Shortfall of the infix stream is ignored and +-- excess is discarded. -- --- >>> :set -XOverloadedStrings --- >>> import Data.Functor.Identity (Identity) --- >>> Stream.interleaveFst "abc" ",,,," :: Stream Identity Char --- fromList "a,b,c" --- >>> Stream.interleaveFst "abc" "," :: Stream Identity Char --- fromList "a,bc" +-- @ +-- (. . .) (a b c) => a . b . c -- additional . is discarded +-- (. . ) (a b c) => a . b . c +-- (. ) (a b c) => a . b c -- missing . is ignored +-- @ -- --- 'interleaveFst' is a dual of 'interleaveFstSuffix'. +-- Examples: -- --- Do not use dynamically. +-- >>> f x y = Stream.toList $ Stream.interleaveSepBy (Stream.fromList x) (Stream.fromList y) +-- >>> f "..." "abc" +-- "a.b.c" +-- >>> f ".." "abc" +-- "a.b.c" +-- >>> f "." "abc" +-- "a.bc" -- --- /Pre-release/ -{-# INLINE_NORMAL interleaveFst #-} -interleaveFst :: Monad m => Stream m a -> Stream m a -> Stream m a -interleaveFst (Stream step1 state1) (Stream step2 state2) = +{-# INLINE_NORMAL interleaveSepBy #-} +interleaveSepBy :: Monad m => Stream m a -> Stream m a -> Stream m a +interleaveSepBy (Stream step2 state2) (Stream step1 state1) = Stream step (InterleaveInfixFirst state1 state2) where @@ -433,6 +572,11 @@ interleaveFst (Stream step1 state1) (Stream step2 state2) = Skip s -> Skip (InterleaveInfixFirstOnly s) Stop -> Stop +{-# DEPRECATED interleaveFst "Please use flip interleaveSepBy instead." #-} +{-# INLINE_NORMAL interleaveFst #-} +interleaveFst :: Monad m => Stream m a -> Stream m a -> Stream m a +interleaveFst = flip interleaveSepBy + ------------------------------------------------------------------------------ -- Scheduling ------------------------------------------------------------------------------ @@ -599,9 +743,23 @@ mergeFstBy _f _m1 _m2 = undefined -- fromStreamK $ D.mergeFstBy f (toStreamD m1) (toStreamD m2) ------------------------------------------------------------------------------ --- Combine N Streams - unfoldMany +-- Combine N Streams - unfoldEach ------------------------------------------------------------------------------ +-- XXX If we want to have strictly N elements in each batch then we can supply a +-- Maybe input to the fold. That could be another variant of this combinator. + +-- | Stream must be finite. Unfolds each element of the input stream to +-- generate streams. After generating one element from each stream fold those +-- using the supplied fold and emit the result in the output stream. Continue +-- doing this until the streams are exhausted. +-- +-- /Unimplemented/ +{-# INLINE_NORMAL unfoldEachFoldBy #-} +unfoldEachFoldBy :: -- Monad m => + Fold m b c -> Unfold m a b -> Stream m a -> Stream m c +unfoldEachFoldBy = undefined + data ConcatUnfoldInterleaveState o i = ConcatUnfoldInterleaveOuter o [i] | ConcatUnfoldInterleaveInner o [i] @@ -624,30 +782,83 @@ data ConcatUnfoldInterleaveState o i = -- -- Ideally, we need some scheduling bias to inner streams vs outer stream. -- Maybe we can configure the behavior. + +-- XXX Instead of using "reverse" build the list in the correct order to begin +-- with. + +-- | Like 'unfoldEach' but interleaves the resulting streams instead of +-- appending them. Unfolds each element in the input stream to a stream and +-- then interleave the resulting streams. +-- +-- >>> lists = Stream.fromList [[1,4,7],[2,5,8],[3,6,9]] +-- >>> Stream.toList $ Stream.unfoldEachInterleave Unfold.fromList lists +-- [1,2,3,4,5,6,7,8,9] +-- +-- This is similar to 'mergeMapWith' using 'Streamly.Data.StreamK.interleave' +-- but an order of magnitude more efficient due to fusion. +-- +-- See also 'mergeMapWith'. +-- +{-# INLINE_NORMAL unfoldEachInterleave #-} +unfoldEachInterleave :: Monad m => + Unfold m a b -> Stream m a -> Stream m b +unfoldEachInterleave (Unfold istep inject) (Stream ostep ost) = + Stream step (ConcatUnfoldInterleaveOuter ost []) + + where + + {-# INLINE_LATE step #-} + step gst (ConcatUnfoldInterleaveOuter o ls) = do + r <- ostep (adaptState gst) o + case r of + Yield a o' -> do + i <- inject a + i `seq` return (Skip (ConcatUnfoldInterleaveInner o' (i : ls))) + Skip o' -> return $ Skip (ConcatUnfoldInterleaveOuter o' ls) + Stop -> return $ Skip (ConcatUnfoldInterleaveInnerL (reverse ls) []) + + step _ (ConcatUnfoldInterleaveInner _ []) = undefined + step _ (ConcatUnfoldInterleaveInner o (st:ls)) = do + r <- istep st + return $ case r of + Yield x s -> Yield x (ConcatUnfoldInterleaveOuter o (s:ls)) + Skip s -> Skip (ConcatUnfoldInterleaveInner o (s:ls)) + Stop -> Skip (ConcatUnfoldInterleaveOuter o ls) + + step _ (ConcatUnfoldInterleaveInnerL [] []) = return Stop + step _ (ConcatUnfoldInterleaveInnerL [] rs) = + return $ Skip (ConcatUnfoldInterleaveInnerR [] (reverse rs)) + + step _ (ConcatUnfoldInterleaveInnerL (st:ls) rs) = do + r <- istep st + return $ case r of + Yield x s -> Yield x (ConcatUnfoldInterleaveInnerL ls (s:rs)) + Skip s -> Skip (ConcatUnfoldInterleaveInnerL (s:ls) rs) + Stop -> Skip (ConcatUnfoldInterleaveInnerL ls rs) + + step _ (ConcatUnfoldInterleaveInnerR [] []) = return Stop + step _ (ConcatUnfoldInterleaveInnerR ls []) = + return $ Skip (ConcatUnfoldInterleaveInnerL (reverse ls) []) + + step _ (ConcatUnfoldInterleaveInnerR ls (st:rs)) = do + r <- istep st + return $ case r of + Yield x s -> Yield x (ConcatUnfoldInterleaveInnerR (s:ls) rs) + Skip s -> Skip (ConcatUnfoldInterleaveInnerR ls (s:rs)) + Stop -> Skip (ConcatUnfoldInterleaveInnerR ls rs) + +-- | Like 'unfoldEachInterleave' but reverses the traversal direction after +-- reaching the last stream. This could be little bit more efficient if the +-- order of traversal is not important. -- --- XXX Instead of using "concatPairsWith wSerial" we can implement an N-way --- interleaving CPS combinator which behaves like unfoldInterleave. Instead --- of pairing up the streams we just need to go yielding one element from each --- stream and storing the remaining streams and then keep doing rounds through --- those in a round robin fashion. This would be much like wAsync. - --- | This does not pair streams like mergeMapWith, instead, it goes through --- each stream one by one and yields one element from each stream. After it --- goes to the last stream it reverses the traversal to come back to the first --- stream yielding elements from each stream on its way back to the first --- stream and so on. --- --- >>> lists = Stream.fromList [[1,1],[2,2],[3,3],[4,4],[5,5]] --- >>> interleaved = Stream.unfoldInterleave Unfold.fromList lists --- >>> Stream.fold Fold.toList interleaved --- [1,2,3,4,5,5,4,3,2,1] --- --- Note that this is order of magnitude more efficient than "mergeMapWith --- interleave" because of fusion. --- -{-# INLINE_NORMAL unfoldInterleave #-} -unfoldInterleave :: Monad m => Unfold m a b -> Stream m a -> Stream m b -unfoldInterleave (Unfold istep inject) (Stream ostep ost) = +-- >>> lists = Stream.fromList [[1,4,7],[2,5,8],[3,6,9]] +-- >>> Stream.toList $ Stream.unfoldEachInterleaveRev Unfold.fromList lists +-- [1,2,3,6,5,4,7,8,9] +-- +{-# INLINE_NORMAL unfoldEachInterleaveRev #-} +unfoldEachInterleaveRev, unfoldInterleave :: Monad m => + Unfold m a b -> Stream m a -> Stream m b +unfoldEachInterleaveRev (Unfold istep inject) (Stream ostep ost) = Stream step (ConcatUnfoldInterleaveOuter ost []) where @@ -692,23 +903,26 @@ unfoldInterleave (Unfold istep inject) (Stream ostep ost) = Skip s -> Skip (ConcatUnfoldInterleaveInnerR ls (s:rs)) Stop -> Skip (ConcatUnfoldInterleaveInnerR ls rs) +RENAME(unfoldInterleave,unfoldEachInterleaveRev) + -- XXX In general we can use different scheduling strategies e.g. how to -- schedule the outer vs inner loop or assigning weights to different streams -- or outer and inner loops. -- -- This could be inefficient if the tasks are too small. -- --- Compared to unfoldInterleave this one switches streams on Skips. +-- Compared to unfoldEachInterleave this one switches streams on Skips. --- | 'unfoldInterleave' switches to the next stream whenever a value from a +-- | 'unfoldEachInterleave' switches to the next stream whenever a value from a -- stream is yielded, it does not switch on a 'Skip'. So if a stream keeps -- skipping for long time other streams won't get a chance to run. --- 'unfoldRoundRobin' switches on Skip as well. So it basically schedules each +-- 'unfoldEachRoundRobin' switches on Skip as well. So it basically schedules each -- stream fairly irrespective of whether it produces a value or not. -- -{-# INLINE_NORMAL unfoldRoundRobin #-} -unfoldRoundRobin :: Monad m => Unfold m a b -> Stream m a -> Stream m b -unfoldRoundRobin (Unfold istep inject) (Stream ostep ost) = +{-# INLINE_NORMAL unfoldEachRoundRobin #-} +unfoldEachRoundRobin, unfoldRoundRobin :: Monad m => + Unfold m a b -> Stream m a -> Stream m b +unfoldEachRoundRobin (Unfold istep inject) (Stream ostep ost) = Stream step (ConcatUnfoldInterleaveOuter ost []) where {-# INLINE_LATE step #-} @@ -753,6 +967,8 @@ unfoldRoundRobin (Unfold istep inject) (Stream ostep ost) = Skip s -> Skip (ConcatUnfoldInterleaveInnerR (s:ls) rs) Stop -> Skip (ConcatUnfoldInterleaveInnerR ls rs) +RENAME(unfoldRoundRobin,unfoldEachRoundRobin) + ------------------------------------------------------------------------------ -- Combine N Streams - interpose ------------------------------------------------------------------------------ @@ -764,16 +980,22 @@ data InterposeSuffixState s1 i1 = | InterposeSuffixFirstInner s1 i1 | InterposeSuffixSecond s1 --- Note that if an unfolded layer turns out to be nil we still emit the +-- XXX Note that if an unfolded layer turns out to be nil we still emit the -- separator effect. An alternate behavior could be to emit the separator -- effect only if at least one element has been yielded by the unfolding. -- However, that becomes a bit complicated, so we have chosen the former --- behvaior for now. -{-# INLINE_NORMAL interposeSuffixM #-} -interposeSuffixM - :: Monad m - => m c -> Unfold m b c -> Stream m b -> Stream m c -interposeSuffixM +-- behavior for now. + +-- | Monadic variant of 'unfoldEachEndBy'. +-- +-- Definition: +-- +-- >>> unfoldEachEndByM x = Stream.intercalateEndBy Unfold.identity (Stream.repeatM x) +-- +{-# INLINE_NORMAL unfoldEachEndByM #-} +unfoldEachEndByM, interposeSuffixM :: Monad m => + m c -> Unfold m b c -> Stream m b -> Stream m c +unfoldEachEndByM action (Unfold istep1 inject1) (Stream step1 state1) = Stream step (InterposeSuffixFirst state1) @@ -811,18 +1033,25 @@ interposeSuffixM r <- action return $ Yield r (InterposeSuffixFirst s1) --- interposeSuffix x unf str = gintercalateSuffix unf str UF.identity (repeat x) - -- | Unfold the elements of a stream, append the given element after each -- unfolded stream and then concat them into a single stream. -- --- >>> unlines = Stream.interposeSuffix '\n' +-- Definition: +-- +-- >>> unfoldEachEndBy x = Stream.intercalateEndBy Unfold.identity (Stream.repeat x) +-- +-- Usage: +-- +-- >>> unlines = Stream.unfoldEachEndBy '\n' -- -- /Pre-release/ -{-# INLINE interposeSuffix #-} -interposeSuffix :: Monad m +{-# INLINE unfoldEachEndBy #-} +unfoldEachEndBy, interposeSuffix :: Monad m => c -> Unfold m b c -> Stream m b -> Stream m c -interposeSuffix x = interposeSuffixM (return x) +unfoldEachEndBy x = unfoldEachEndByM (return x) + +RENAME(interposeSuffix,unfoldEachEndBy) +RENAME(interposeSuffixM,unfoldEachEndByM) {-# ANN type InterposeState Fuse #-} data InterposeState s1 i1 a = @@ -837,9 +1066,17 @@ data InterposeState s1 i1 a = -- Note that this only interposes the pure values, we may run many effects to -- generate those values as some effects may not generate anything (Skip). -{-# INLINE_NORMAL interposeM #-} -interposeM :: Monad m => m c -> Unfold m b c -> Stream m b -> Stream m c -interposeM + +-- | Monadic variant of 'unfoldEachSepBy'. +-- +-- Definition: +-- +-- >>> unfoldEachSepByM x = Stream.intercalateSepBy Unfold.identity (Stream.repeatM x) +-- +{-# INLINE_NORMAL unfoldEachSepByM #-} +unfoldEachSepByM, interposeM :: Monad m => + m c -> Unfold m b c -> Stream m b -> Stream m c +unfoldEachSepByM action (Unfold istep1 inject1) (Stream step1 state1) = Stream step (InterposeFirst state1) @@ -906,18 +1143,26 @@ interposeM return $ Yield v (InterposeFirstInner s1 i1) -} --- > interpose x unf str = gintercalate unf str UF.identity (repeat x) - -- | Unfold the elements of a stream, intersperse the given element between the -- unfolded streams and then concat them into a single stream. -- --- >>> unwords = Stream.interpose ' ' +-- Definition: +-- +-- >>> unfoldEachSepBy x = unfoldEachSepByM (return x) +-- >>> unfoldEachSepBy x = Stream.intercalateSepBy Unfold.identity (Stream.repeat x) +-- +-- Usage: +-- +-- >>> unwords = Stream.unfoldEachSepBy ' ' -- -- /Pre-release/ -{-# INLINE interpose #-} -interpose :: Monad m +{-# INLINE unfoldEachSepBy #-} +unfoldEachSepBy, interpose :: Monad m => c -> Unfold m b c -> Stream m b -> Stream m c -interpose x = interposeM (return x) +unfoldEachSepBy x = unfoldEachSepByM (return x) + +RENAME(interposeM,unfoldEachSepByM) +RENAME(interpose,unfoldEachSepBy) ------------------------------------------------------------------------------ -- Combine N Streams - intercalate @@ -933,16 +1178,22 @@ data ICUState s1 s2 i1 i2 = | ICUFirstOnlyInner s1 i1 | ICUSecondOnlyInner s2 i2 --- | 'interleaveFstSuffix' followed by unfold and concat. +-- | See 'intercalateSepBy' for detailed documentation. +-- +-- You can think of this as 'interleaveEndBy' on the stream of streams followed +-- by concat. Same as the following but more efficient: +-- +-- >>> intercalateEndBy u1 s1 u2 s2 = Stream.concat $ interleaveEndBy (fmap (unfold u1) s1) (fmap (unfold u2) s2) -- -- /Pre-release/ -{-# INLINE_NORMAL gintercalateSuffix #-} -gintercalateSuffix - :: Monad m - => Unfold m a c -> Stream m a -> Unfold m b c -> Stream m b -> Stream m c -gintercalateSuffix - (Unfold istep1 inject1) (Stream step1 state1) - (Unfold istep2 inject2) (Stream step2 state2) = +{-# INLINE_NORMAL intercalateEndBy #-} +intercalateEndBy :: Monad m => + Unfold m a c -> Stream m a + -> Unfold m b c -> Stream m b + -> Stream m c +intercalateEndBy + (Unfold istep2 inject2) (Stream step2 state2) + (Unfold istep1 inject1) (Stream step1 state1) = Stream step (ICUFirst state1 state2) where @@ -999,6 +1250,17 @@ gintercalateSuffix step _ (ICUSecondOnly _s2) = undefined step _ (ICUSecondOnlyInner _s2 _i2) = undefined +-- | +-- +-- >>> gintercalateSuffix u1 s1 u2 s2 = Stream.intercalateEndBy u2 s2 u1 s1 +-- +{-# DEPRECATED gintercalateSuffix "Please use intercalateEndBy instead. Note the change in argument order." #-} +{-# INLINE gintercalateSuffix #-} +gintercalateSuffix + :: Monad m + => Unfold m a c -> Stream m a -> Unfold m b c -> Stream m b -> Stream m c +gintercalateSuffix u1 s1 u2 s2 = intercalateEndBy u2 s2 u1 s1 + data ICALState s1 s2 i1 i2 a = ICALFirst s1 s2 -- | ICALFirstYield s1 s2 i1 @@ -1012,24 +1274,36 @@ data ICALState s1 s2 i1 i2 a = -- -- | ICALSecondInner s1 s2 i1 i2 a -- -- | ICALFirstResume s1 s2 i1 i2 a --- XXX we can swap the order of arguments to gintercalate so that the --- definition of unfoldMany becomes simpler? The first stream should be --- infixed inside the second one. However, if we change the order in --- "interleave" as well similarly, then that will make it a bit unintuitive. +-- | The first stream @Stream m b@ is turned into a stream of streams by +-- unfolding each element using the first unfold, similarly @Stream m a@ is +-- also turned into a stream of streams. The second stream of streams is +-- interspersed with the streams from the first stream in an infix manner and +-- then the resulting stream is flattened. -- --- > unfoldMany unf str = --- > gintercalate unf str (UF.nilM (\_ -> return ())) (repeat ()) - --- | 'interleaveFst' followed by unfold and concat. +-- You can think of this as 'interleaveSepBy' on the stream of streams followed +-- by concat. Same as the following but more efficient: +-- +-- >>> intercalateSepBy u1 s1 u2 s2 = Stream.concat $ interleaveSepBy (fmap (unfold u1) s1) (fmap (unfold u2) s2) +-- +-- If the separator stream consists of nil streams then it becomes equivalent +-- to 'unfoldEach': +-- +-- >>> unfoldEach = intercalateSepBy (Unfold.nilM (const (return ()))) (Stream.repeat ()) -- -- /Pre-release/ -{-# INLINE_NORMAL gintercalate #-} -gintercalate +{-# INLINE_NORMAL intercalateSepBy #-} +intercalateSepBy :: Monad m - => Unfold m a c -> Stream m a -> Unfold m b c -> Stream m b -> Stream m c -gintercalate - (Unfold istep1 inject1) (Stream step1 state1) - (Unfold istep2 inject2) (Stream step2 state2) = + => Unfold m b c -> Stream m b + -> Unfold m a c -> Stream m a + -> Stream m c +{- +intercalateSepBy u1 s1 u2 s2 = + Stream.concat $ interleaveSepBy (fmap (unfold u1) s1) (fmap (unfold u2) s2) +-} +intercalateSepBy + (Unfold istep2 inject2) (Stream step2 state2) + (Unfold istep1 inject1) (Stream step1 state1) = Stream step (ICALFirst state1 state2) where @@ -1129,39 +1403,75 @@ gintercalate return $ Yield x (ICALFirstInner s1 s2 i1 i2) -} --- > intercalateSuffix unf seed str = gintercalateSuffix unf str unf (repeatM seed) +-- | +-- +-- >>> gintercalate u1 s1 u2 s2 = Stream.intercalateSepBy u2 s2 u1 s1 +-- +{-# DEPRECATED gintercalate "Please use intercalateSepBy instead." #-} +{-# INLINE gintercalate #-} +gintercalate :: Monad m => + Unfold m a c -> Stream m a -> Unfold m b c -> Stream m b -> Stream m c +gintercalate u1 s1 u2 s2 = intercalateSepBy u2 s2 u1 s1 --- | 'intersperseMSuffix' followed by unfold and concat. +-- | Unfold each element of the stream, end each unfold by a sequence generated +-- by unfolding the supplied value. +-- +-- Definition: -- --- >>> intercalateSuffix u a = Stream.unfoldMany u . Stream.intersperseMSuffix a --- >>> intersperseMSuffix = Stream.intercalateSuffix Unfold.identity --- >>> unlines = Stream.intercalateSuffix Unfold.fromList "\n" +-- >>> unfoldEachEndBySeq a u = Stream.unfoldEach u . Stream.intersperseEndByM a +-- >>> unfoldEachEndBySeq a u = intercalateEndBy u (Stream.repeat a) u +-- +-- Idioms: +-- +-- >>> intersperseEndByM x = Stream.unfoldEachEndBySeq x Unfold.identity +-- >>> unlines = Stream.unfoldEachEndBySeq "\n" Unfold.fromList +-- +-- Usage: -- -- >>> input = Stream.fromList ["abc", "def", "ghi"] --- >>> Stream.fold Fold.toList $ Stream.intercalateSuffix Unfold.fromList "\n" input +-- >>> Stream.toList $ Stream.unfoldEachEndBySeq Unfold.fromList "\n" input -- "abc\ndef\nghi\n" -- +{-# INLINE unfoldEachEndBySeq #-} +unfoldEachEndBySeq :: Monad m + => b -> Unfold m b c -> Stream m b -> Stream m c +unfoldEachEndBySeq seed unf = unfoldEach unf . intersperseEndByM (return seed) + +{-# DEPRECATED intercalateSuffix "Please use unfoldEachEndBySeq instead." #-} {-# INLINE intercalateSuffix #-} intercalateSuffix :: Monad m => Unfold m b c -> b -> Stream m b -> Stream m c -intercalateSuffix unf seed = unfoldMany unf . intersperseMSuffix (return seed) - --- > intercalate unf seed str = gintercalate unf str unf (repeatM seed) +intercalateSuffix u x = unfoldEachEndBySeq x u --- | 'intersperse' followed by unfold and concat. +-- | Unfold each element of the stream, separate the successive unfolds by a +-- sequence generated by unfolding the supplied value. -- --- >>> intercalate u a = Stream.unfoldMany u . Stream.intersperse a --- >>> intersperse = Stream.intercalate Unfold.identity --- >>> unwords = Stream.intercalate Unfold.fromList " " +-- Definition: +-- +-- >>> unfoldEachSepBySeq a u = Stream.unfoldEach u . Stream.intersperse a +-- >>> unfoldEachSepBySeq a u = Stream.intercalateSepBy u (Stream.repeat a) u +-- +-- Idioms: +-- +-- >>> intersperse x = Stream.unfoldEachSepBySeq x Unfold.identity +-- >>> unwords = Stream.unfoldEachSepBySeq " " Unfold.fromList +-- +-- Usage: -- -- >>> input = Stream.fromList ["abc", "def", "ghi"] --- >>> Stream.fold Fold.toList $ Stream.intercalate Unfold.fromList " " input +-- >>> Stream.toList $ Stream.UnfoldEachSepBySeq " " Unfold.fromList input -- "abc def ghi" -- +{-# INLINE unfoldEachSepBySeq #-} +unfoldEachSepBySeq :: Monad m + => b -> Unfold m b c -> Stream m b -> Stream m c +unfoldEachSepBySeq seed unf str = unfoldEach unf $ intersperse seed str + +{-# DEPRECATED intercalate "Please use unfoldEachSepBySeq instead." #-} {-# INLINE intercalate #-} intercalate :: Monad m => Unfold m b c -> b -> Stream m b -> Stream m c -intercalate unf seed str = unfoldMany unf $ intersperse seed str +intercalate u x = unfoldEachSepBySeq x u ------------------------------------------------------------------------------ -- Folding @@ -1192,6 +1502,8 @@ data FIterState s f m a b -- generate the first fold, the fold is applied on the stream and the result of -- the fold is used to generate the next fold and so on. -- +-- Usage: +-- -- >>> import Data.Monoid (Sum(..)) -- >>> f x = return (Fold.take 2 (Fold.sconcat x)) -- >>> s = fmap Sum $ Stream.fromList [1..10] @@ -1252,15 +1564,30 @@ data ParseChunksState x inpBuf st pst = | ParseChunksYield x (ParseChunksState x inpBuf st pst) -- XXX return the remaining stream as part of the error. --- XXX This is in fact parseMany1 (a la foldMany1). Do we need a parseMany as --- well? -{-# INLINE_NORMAL parseManyD #-} -parseManyD + +-- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the +-- output stream. +-- +-- Usage: +-- +-- >>> s = Stream.fromList [1..10] +-- >>> parser = Parser.takeBetween 0 2 Fold.sum +-- >>> Stream.toList $ Stream.parseMany parser s +-- [Right 3,Right 7,Right 11,Right 15,Right 19] +-- +-- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse +-- combinator. +-- +-- Known Issues: When the parser fails there is no way to get the remaining +-- stream. +-- +{-# INLINE_NORMAL parseMany #-} +parseMany :: Monad m => PRD.Parser a m b -> Stream m a -> Stream m (Either ParseError b) -parseManyD (PRD.Parser pstep initial extract) (Stream step state) = +parseMany (PRD.Parser pstep initial extract) (Stream step state) = Stream stepOuter (ParseChunksInit [] state) where @@ -1466,29 +1793,14 @@ parseManyD (PRD.Parser pstep initial extract) (Stream step state) = stepOuter _ (ParseChunksYield a next) = return $ Yield a next --- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the --- output stream. --- --- Example: --- --- >>> s = Stream.fromList [1..10] --- >>> parser = Parser.takeBetween 0 2 Fold.sum --- >>> Stream.fold Fold.toList $ Stream.parseMany parser s --- [Right 3,Right 7,Right 11,Right 15,Right 19] --- --- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse --- combinator. --- --- Known Issues: When the parser fails there is no way to get the remaining --- stream. --- -{-# INLINE parseMany #-} -parseMany +{-# DEPRECATED parseManyD "Please use parseMany instead." #-} +{-# INLINE parseManyD #-} +parseManyD :: Monad m => PR.Parser a m b -> Stream m a -> Stream m (Either ParseError b) -parseMany = parseManyD +parseManyD = parseMany -- | Apply a stream of parsers to an input stream and emit the results in the -- output stream. @@ -1537,15 +1849,30 @@ data ConcatParseState c b inpBuf st p m a = inpBuf inpBuf (s -> a -> m (PRD.Step s b)) s (s -> m (PRD.Step s b)) | ConcatParseYield c (ConcatParseState c b inpBuf st p m a) --- XXX Review the changes -{-# INLINE_NORMAL parseIterateD #-} -parseIterateD +-- | Iterate a parser generating function on a stream. The initial value @b@ is +-- used to generate the first parser, the parser is applied on the stream and +-- the result is used to generate the next parser and so on. +-- +-- Example: +-- +-- >>> import Data.Monoid (Sum(..)) +-- >>> s = Stream.fromList [1..10] +-- >>> Stream.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s +-- [3,10,21,36,55,55] +-- +-- This is the streaming equivalent of monad like sequenced application of +-- parsers where next parser is dependent on the previous parser. +-- +-- /Pre-release/ +-- +{-# INLINE_NORMAL parseIterate #-} +parseIterate :: Monad m => (b -> PRD.Parser a m b) -> b -> Stream m a -> Stream m (Either ParseError b) -parseIterateD func seed (Stream step state) = +parseIterate func seed (Stream step state) = Stream stepOuter (ConcatParseInit [] state (func seed)) where @@ -1737,28 +2064,15 @@ parseIterateD func seed (Stream step state) = stepOuter _ (ConcatParseYield a next) = return $ Yield a next --- | Iterate a parser generating function on a stream. The initial value @b@ is --- used to generate the first parser, the parser is applied on the stream and --- the result is used to generate the next parser and so on. --- --- >>> import Data.Monoid (Sum(..)) --- >>> s = Stream.fromList [1..10] --- >>> Stream.fold Fold.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s --- [3,10,21,36,55,55] --- --- This is the streaming equivalent of monad like sequenced application of --- parsers where next parser is dependent on the previous parser. --- --- /Pre-release/ --- -{-# INLINE parseIterate #-} -parseIterate +{-# DEPRECATED parseIterateD "Please use parseIterate instead." #-} +{-# INLINE parseIterateD #-} +parseIterateD :: Monad m => (b -> PR.Parser a m b) -> b -> Stream m a -> Stream m (Either ParseError b) -parseIterate = parseIterateD +parseIterateD = parseIterate ------------------------------------------------------------------------------ -- Grouping @@ -1772,13 +2086,15 @@ data GroupByState st fs a b | GroupingYield !b (GroupByState st fs a b) | GroupingDone --- | The argument order of the comparison function in `groupsWhile` is --- different than that of `groupsBy`. +-- | Keep collecting items in a group as long as the comparison function +-- returns true. The comparison function is @cmp old new@ where @old@ is the +-- first item in the group and @new@ is the incoming item being tested for +-- membership of the group. The collected items are folded by the supplied +-- fold. -- --- In `groupsBy` the comparison function takes the next element as the first --- argument and the previous element as the second argument. In `groupsWhile` --- the first argument is the previous element and second argument is the next --- element. +-- Definition: +-- +-- >>> groupsWhile cmp f = Stream.parseMany (Parser.groupBy cmp f) {-# INLINE_NORMAL groupsWhile #-} groupsWhile :: Monad m => (a -> a -> Bool) @@ -1867,6 +2183,13 @@ groupsWhile cmp (Fold fstep initial _ final) (Stream step state) = stepOuter _ (GroupingYield _ _) = error "groupsWhile: Unreachable" stepOuter _ GroupingDone = return Stop +-- | The argument order of the comparison function in `groupsWhile` is +-- different than that of `groupsBy`. +-- +-- In `groupsBy` the comparison function takes the next element as the first +-- argument and the previous element as the second argument. In `groupsWhile` +-- the first argument is the previous element and second argument is the next +-- element. {-# DEPRECATED groupsBy "Please use groupsWhile instead. Please note the change in the argument order of the comparison function." #-} {-# INLINE_NORMAL groupsBy #-} groupsBy :: Monad m @@ -1876,6 +2199,12 @@ groupsBy :: Monad m -> Stream m b groupsBy cmp = groupsWhile (flip cmp) +-- | +-- +-- Definition: +-- +-- >>> groupsRollingBy cmp f = Stream.parseMany (Parser.groupByRolling cmp f) +-- {-# INLINE_NORMAL groupsRollingBy #-} groupsRollingBy :: Monad m => (a -> a -> Bool) @@ -1991,12 +2320,19 @@ data WordsByState st fs b | WordsByDone | WordsByYield !b (WordsByState st fs b) --- | Split the stream after stripping leading, trailing, and repeated separators --- as per the fold supplied. --- Therefore, @".a..b."@ with '.' as the separator would be parsed as --- @["a","b"]@. In other words, its like parsing words from whitespace --- separated text. - +-- | Split the stream after stripping leading, trailing, and repeated +-- separators determined by the predicate supplied. The tokens after splitting +-- are collected by the supplied fold. In other words, the tokens are parsed in +-- the same way as words are parsed from whitespace separated text. +-- +-- >>> f x = Stream.toList $ Stream.wordsBy (== '.') Fold.toList $ Stream.fromList x +-- >>> f "a.b" +-- ["a","b"] +-- >>> f "a..b" +-- ["a","b"] +-- >>> f ".a..b." +-- ["a","b"] +-- {-# INLINE_NORMAL wordsBy #-} wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b wordsBy predicate (Fold fstep initial _ final) (Stream step state) = @@ -2353,8 +2689,19 @@ takeEndBySeqWith withSep patArr (Stream step state) = let rb1 = RB.moveForward rb yield old $ TakeEndBySeqKRDone (len - SIZE_OF(a)) rb1 --- XXX takeEndOn, takeEndOn_ would be better names, "By" is for functions and --- "On" is for values. Change splitOn to splitBy. +-- | Take the stream until the supplied sequence is encountered. Take the +-- sequence as well and stop. +-- +-- Usage: +-- +-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq (Array.fromList pat) $ Stream.fromList xs +-- >>> f "fgh" "abcdefghijk" +-- "abcdefgh" +-- >>> f "lmn" "abcdefghijk" +-- "abcdefghijk" +-- >>> f "" "abcdefghijk" +-- "" +-- {-# INLINE takeEndBySeq #-} takeEndBySeq :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) @@ -2363,6 +2710,19 @@ takeEndBySeq -> Stream m a takeEndBySeq = takeEndBySeqWith True +-- | Take the stream until the supplied sequence is encountered. Do not take +-- the sequence. +-- +-- Usage: +-- +-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq_ (Array.fromList pat) $ Stream.fromList xs +-- >>> f "fgh" "abcdefghijk" +-- "abcde" +-- >>> f "lmn" "abcdefghijk" +-- "abcdefghijk" +-- >>> f "" "abcdefghijk" +-- "" +-- {-# INLINE takeEndBySeq_ #-} takeEndBySeq_ :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) @@ -2409,6 +2769,8 @@ data SplitOnSeqState mba rb rh ck w fs s b x = | SplitOnSeqReinit (fs -> SplitOnSeqState mba rb rh ck w fs s b x) +-- XXX Need to fix empty stream split behavior + -- | Like 'splitOn' but splits the stream on a sequence of elements rather than -- a single element. Parses a sequence of tokens separated by an infixed -- separator e.g. @a;b;c@ is parsed as @a@, @b@, @c@. If the pattern is empty @@ -2416,18 +2778,18 @@ data SplitOnSeqState mba rb rh ck w fs s b x = -- -- Equivalent to the following: -- --- >>> splitOnSeq pat f = Stream.foldMany1 (Fold.takeEndBySeq_ pat f) +-- >>> splitSepBySeq_ pat f = Stream.foldMany0 (Fold.takeEndBySeq_ pat f) -- -- Uses Rabin-Karp algorithm for substring search. -- -{-# INLINE_NORMAL splitOnSeq #-} -splitOnSeq +{-# INLINE_NORMAL splitSepBySeq_ #-} +splitSepBySeq_, splitOnSeq :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) => Array a -> Fold m a b -> Stream m a -> Stream m b -splitOnSeq patArr (Fold fstep initial _ final) (Stream step state) = +splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = Stream stepOuter SplitOnSeqInit where @@ -2752,6 +3114,8 @@ splitOnSeq patArr (Fold fstep initial _ final) (Stream step state) = let jump c = SplitOnSeqKRDone (len - SIZE_OF(a)) c rb1 yieldReinit jump b +RENAME(splitOnSeq,splitSepBySeq_) + {-# ANN type SplitOnSuffixSeqState Fuse #-} data SplitOnSuffixSeqState mba rb rh ck w fs s b x = SplitOnSuffixSeqInit @@ -2780,6 +3144,8 @@ data SplitOnSuffixSeqState mba rb rh ck w fs s b x = -- suffixed separator, the resulting split segments are fed to the fold @fld@. -- If @withSep@ is True then the separator sequence is also suffixed with the -- split segments. +-- +-- /Internal/ {-# INLINE_NORMAL splitOnSuffixSeq #-} splitOnSuffixSeq :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) @@ -3147,7 +3513,7 @@ splitOnSuffixSeq withSep patArr (Fold fstep initial _ final) (Stream step state) -- -- Equivalent to the following: -- --- >>> splitOnSeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f) +-- >>> splitEndBySeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f) -- -- Uses Rabin-Karp algorithm for substring search. -- @@ -3179,14 +3545,14 @@ splitEndBySeq_ = splitOnSuffixSeq False -- Implement this as a fold or a parser instead. -- This can be implemented easily using Rabin Karp + -- | Split post any one of the given patterns. -- -- /Unimplemented/ -{-# INLINE splitOnSuffixSeqAny #-} -splitOnSuffixSeqAny :: -- (Monad m, Unboxed a, Integral a) => +{-# INLINE splitEndBySeqOneOf #-} +splitEndBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => [Array a] -> Fold m a b -> Stream m a -> Stream m b -splitOnSuffixSeqAny _subseq _f _m = undefined - -- D.fromStreamD $ D.splitPostAny f subseq (D.toStreamD m) +splitEndBySeqOneOf _subseq _f _m = undefined -- | Split on a prefixed separator element, dropping the separator. The -- supplied 'Fold' is applied on the split segments. @@ -3237,10 +3603,10 @@ splitOnSuffixSeqAny _subseq _f _m = undefined -- > Stream.splitOnPrefix (== '.') Fold.toList . Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList === id -- -- /Unimplemented/ -{-# INLINE splitOnPrefix #-} -splitOnPrefix :: -- (IsStream t, MonadCatch m) => +{-# INLINE splitBeginBy_ #-} +splitBeginBy_ :: -- (MonadCatch m) => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitOnPrefix _predicate _f = undefined +splitBeginBy_ _predicate _f = undefined -- parseMany (Parser.sliceBeginBy predicate f) -- Int list examples for splitOn: @@ -3274,10 +3640,10 @@ splitOnPrefix _predicate _f = undefined -- -- /Unimplemented/ -- -{-# INLINE splitOnAny #-} -splitOnAny :: -- (Monad m, Unboxed a, Integral a) => +{-# INLINE splitSepBySeqOneOf #-} +splitSepBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => [Array a] -> Fold m a b -> Stream m a -> Stream m b -splitOnAny _subseq _f _m = +splitSepBySeqOneOf _subseq _f _m = undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m) ------------------------------------------------------------------------------ diff --git a/core/src/Streamly/Internal/Data/Stream/Top.hs b/core/src/Streamly/Internal/Data/Stream/Top.hs index ea6474f1d7..7937f6657a 100644 --- a/core/src/Streamly/Internal/Data/Stream/Top.hs +++ b/core/src/Streamly/Internal/Data/Stream/Top.hs @@ -48,10 +48,6 @@ module Streamly.Internal.Data.Stream.Top ( - -- * Sampling - -- | Value agnostic filtering. - strideFromThen - -- * Straight Joins -- | These are set-like operations but not exactly set operations because -- streams are not necessarily sets, they may have duplicated elements. @@ -59,7 +55,7 @@ module Streamly.Internal.Data.Stream.Top -- types, therefore, they have quadratic performance characterstics. For -- better performance using Set or Map structures see the -- Streamly.Internal.Data.Stream.Container module. - , intersectBy + intersectBy , deleteFirstsBy , unionBy @@ -96,25 +92,6 @@ import Prelude hiding (filter, zipWith, concatMap, concat) #include "DocTestDataStream.hs" ------------------------------------------------------------------------------- --- Sampling ------------------------------------------------------------------------------- - --- XXX We can implement this using addition instead of "mod" to make it more --- efficient. - --- | @strideFromthen offset stride@ takes the element at @offset@ index and --- then every element at strides of @stride@. --- --- >>> Stream.fold Fold.toList $ Stream.strideFromThen 2 3 $ Stream.enumerateFromTo 0 10 --- [2,5,8] --- -{-# INLINE strideFromThen #-} -strideFromThen :: Monad m => Int -> Int -> Stream m a -> Stream m a -strideFromThen offset stride = - Stream.with Stream.indexed Stream.filter - (\(i, _) -> i >= offset && (i - offset) `mod` stride == 0) - ------------------------------------------------------------------------------ -- SQL Joins ------------------------------------------------------------------------------ @@ -419,7 +396,7 @@ sortedDeleteFirstsBy _eq _s1 _s2 = undefined -- | Returns the first stream appended with those unique elements from the -- second stream that are not already present in the first stream. Note that -- this is not a commutative operation unlike a set union, argument order --- matters. The behavior is similar to the 'Data.List.unionBy'. +-- matters. The behavior is similar to 'Data.List.unionBy'. -- -- Equivalent to the following except that @s2@ is evaluated only once: -- diff --git a/core/src/Streamly/Internal/Data/Stream/Transform.hs b/core/src/Streamly/Internal/Data/Stream/Transform.hs index 765d047e7b..268dc1776d 100644 --- a/core/src/Streamly/Internal/Data/Stream/Transform.hs +++ b/core/src/Streamly/Internal/Data/Stream/Transform.hs @@ -7,8 +7,6 @@ -- Maintainer : streamly@composewell.com -- Stability : experimental -- Portability : GHC --- --- "Streamly.Internal.Data.Pipe" might ultimately replace this module. -- A few functions in this module have been adapted from the vector package -- (c) Roman Leshchinskiy. See the notes in specific combinators. @@ -37,7 +35,7 @@ module Streamly.Internal.Data.Stream.Transform , pipe -- * Splitting - , splitOn + , splitSepBy_ -- * Ad-hoc Scans -- | Left scans. Stateful, mostly one-to-one maps. @@ -66,43 +64,63 @@ module Streamly.Internal.Data.Stream.Transform , scanlx' -- * Filtering + -- delete is for once like insert, filter is for many like intersperse. + -- | Produce a subset of the stream. , with , postscanlMaybe - , filter + , filter -- retainBy , filterM - , deleteBy -- deleteOnceBy? + , deleteBy -- deleteOnceBy/deleteFirstBy? , uniqBy , uniq , prune , repeated + -- * Sampling + -- | Value agnostic filtering. + , sampleFromThen + -- keepEvery/filterEvery -- sampling + -- deleteEvery/dropEvery/removeEvery -- dual of intersperseEvery + -- deintersperse - drop infixed elements + -- * Trimming -- | Produce a subset of the stream trimmed at ends. , initNonEmpty , tailNonEmpty - , takeWhileLast - , takeWhileAround , drop , dropWhile , dropWhileM + + -- * Trimming from end + -- | Ring array based or buffering operations. + -- + , takeWhileLast + , takeWhileAround , dropLast , dropWhileLast , dropWhileAround -- * Inserting Elements - -- | Produce a superset of the stream. - , insertBy + -- insert is for once like delete, intersperse is for many like filter + -- | Produce a superset of the stream. Value agnostic insertion. , intersperse , intersperseM - , intersperseMWith - , intersperseMSuffix - , intersperseMSuffixWith + , intersperseEveryM + , intersperseEndByM + , intersperseEndByEveryM + + -- Value aware insertion. + , insertBy -- insertCmpBy + -- insertBeforeBy + -- insertAfterBy + -- intersperseBeforeBy + -- intersperseAfterBy -- * Inserting Side Effects , intersperseM_ - , intersperseMSuffix_ - , intersperseMPrefix_ + , intersperseEndByM_ + , intersperseBeginByM_ , delay , delayPre @@ -149,9 +167,16 @@ module Streamly.Internal.Data.Stream.Transform , scan , scanMany , scanMaybe + , intersperseMSuffix + , intersperseMSuffixWith + , intersperseMSuffix_ + , intersperseMPrefix_ + , strideFromThen + , splitOn ) where +#include "deprecation.h" #include "inline.hs" import Control.Concurrent (threadDelay) @@ -1164,6 +1189,28 @@ repeated :: -- (Monad m, Eq a) => Stream m a -> Stream m a repeated = undefined +------------------------------------------------------------------------------ +-- Sampling +------------------------------------------------------------------------------ + +-- XXX We can implement this using addition instead of "mod" to make it more +-- efficient. + +-- | @sampleFromThen offset stride@ takes the element at @offset@ index and +-- then every element at strides of @stride@. +-- +-- >>> Stream.fold Fold.toList $ Stream.strideFromThen 2 3 $ Stream.enumerateFromTo 0 10 +-- [2,5,8] +-- +{-# INLINE sampleFromThen #-} +sampleFromThen, strideFromThen :: Monad m => + Int -> Int -> Stream m a -> Stream m a +sampleFromThen offset stride = + with indexed filter + (\(i, _) -> i >= offset && (i - offset) `mod` stride == 0) + +RENAME(strideFromThen,sampleFromThen) + ------------------------------------------------------------------------------ -- Trimming ------------------------------------------------------------------------------ @@ -1378,21 +1425,13 @@ data LoopState x s = FirstYield s | InterspersingYield s | YieldAndCarry x s --- intersperseM = intersperseMWith 1 - --- | Insert an effect and its output before consuming an element of a stream --- except the first one. +-- | Effectful variant of 'intersperse'. Insert an effect and its output +-- between successive elements of a stream. It does nothing if stream has less +-- than two elements. -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.trace putChar $ Stream.intersperseM (putChar '.' >> return ',') input --- h.,e.,l.,l.,o"h,e,l,l,o" --- --- Be careful about the order of effects. In the above example we used trace --- after the intersperse, if we use it before the intersperse the output would --- be he.l.l.o."h,e,l,l,o". +-- Definition: -- --- >>> Stream.fold Fold.toList $ Stream.intersperseM (putChar '.' >> return ',') $ Stream.trace putChar input --- he.l.l.o."h,e,l,l,o" +-- >>> intersperseM x = Stream.interleaveSepBy (Stream.repeatM x) -- {-# INLINE_NORMAL intersperseM #-} intersperseM :: Monad m => m a -> Stream m a -> Stream m a @@ -1418,22 +1457,36 @@ intersperseM m (Stream step state) = Stream step' (FirstYield state) step' _ (YieldAndCarry x st) = return $ Yield x (InterspersingYield st) --- | Insert a pure value between successive elements of a stream. +-- | Insert a pure value between successive elements of a stream. It does +-- nothing if stream has less than two elements. -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.intersperse ',' input --- "h,e,l,l,o" +-- Definition: +-- +-- >>> intersperse x = intersperseM (return x) +-- >>> intersperse x = Stream.unfoldEachSepBy x Unfold.identity +-- >>> intersperse x = Stream.unfoldEachSepBySeq x Unfold.identity +-- >>> intersperse x = Stream.interleaveSepBy (Stream.repeat x) +-- +-- Example: +-- +-- >>> f x y = Stream.toList $ Stream.intersperse x $ Stream.fromList y +-- >>> f ',' "abc" +-- "a,b,c" +-- >>> f ',' "a" +-- "a" -- {-# INLINE intersperse #-} intersperse :: Monad m => a -> Stream m a -> Stream m a intersperse a = intersperseM (return a) --- | Insert a side effect before consuming an element of a stream except the --- first one. +-- | Perform a side effect between two successive elements of a stream. It does +-- nothing if the stream has less than two elements. -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.drain $ Stream.trace putChar $ Stream.intersperseM_ (putChar '.') input --- h.e.l.l.o +-- >>> f x y = Stream.fold Fold.drain $ Stream.trace putChar $ Stream.intersperseM_ x $ Stream.fromList y +-- >>> f (putChar '.') "abc" +-- a.b.c +-- >>> f (putChar '.') "a" +-- a -- -- /Pre-release/ {-# INLINE_NORMAL intersperseM_ #-} @@ -1453,31 +1506,51 @@ intersperseM_ m (Stream step1 state1) = Stream step (Left (pure (), state1)) -- | Intersperse a monadic action into the input stream after every @n@ -- elements. -- +-- Definition: +-- +-- >> intersperseEveryM n x = Stream.interleaveEverySepBy n (Stream.repeatM x) +-- +-- Idioms: +-- +-- >>> intersperseM = intersperseEveryM 1 +-- >>> intersperse x = intersperseEveryM 1 (return x) +-- +-- Usage: +-- -- >> input = Stream.fromList "hello" --- >> Stream.fold Fold.toList $ Stream.intersperseMWith 2 (return ',') input +-- >> Stream.toList $ Stream.intersperseEveryM 2 (return ',') input -- "he,ll,o" -- -- /Unimplemented/ -{-# INLINE intersperseMWith #-} -intersperseMWith :: -- Monad m => +{-# INLINE intersperseEveryM #-} +intersperseEveryM :: -- Monad m => Int -> m a -> Stream m a -> Stream m a -intersperseMWith _n _f _xs = undefined +intersperseEveryM _n _f _xs = undefined data SuffixState s a = SuffixElem s | SuffixSuffix s | SuffixYield a (SuffixState s a) --- | Insert an effect and its output after consuming an element of a stream. +-- | Insert an effect and its output after every element of a stream. -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.trace putChar $ Stream.intersperseMSuffix (putChar '.' >> return ',') input --- h.,e.,l.,l.,o.,"h,e,l,l,o," +-- Definition: +-- +-- >>> intersperseEndByM x = Stream.interleaveEndBy (Stream.repeatM x) +-- +-- Usage: +-- +-- >>> f x y = Stream.toList $ Stream.intersperseEndByM (pure x) $ Stream.fromList y +-- >>> f ',' "abc" +-- "a,b,c," +-- >>> f ',' "a" +-- "a," -- -- /Pre-release/ -{-# INLINE_NORMAL intersperseMSuffix #-} -intersperseMSuffix :: forall m a. Monad m => m a -> Stream m a -> Stream m a -intersperseMSuffix action (Stream step state) = Stream step' (SuffixElem state) +{-# INLINE_NORMAL intersperseEndByM #-} +intersperseEndByM, intersperseMSuffix :: forall m a. Monad m => + m a -> Stream m a -> Stream m a +intersperseEndByM action (Stream step state) = Stream step' (SuffixElem state) where {-# INLINE_LATE step' #-} step' gst (SuffixElem st) = do @@ -1492,17 +1565,23 @@ intersperseMSuffix action (Stream step state) = Stream step' (SuffixElem state) step' _ (SuffixYield x next) = return $ Yield x next --- | Insert a side effect after consuming an element of a stream. +RENAME(intersperseMSuffix,intersperseEndByM) + +-- | Insert an effect after every element of a stream. -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.intersperseMSuffix_ (threadDelay 1000000) input --- "hello" +-- Example: +-- +-- >>> f x y = Stream.fold Fold.drain $ Stream.trace putChar $ Stream.intersperseEndByM_ x $ Stream.fromList y +-- >>> f (putChar '.') "abc" +-- a.b.c. +-- >>> f (putChar '.') "a" +-- a. -- -- /Pre-release/ -- -{-# INLINE_NORMAL intersperseMSuffix_ #-} -intersperseMSuffix_ :: Monad m => m b -> Stream m a -> Stream m a -intersperseMSuffix_ m (Stream step1 state1) = Stream step (Left state1) +{-# INLINE_NORMAL intersperseEndByM_ #-} +intersperseEndByM_, intersperseMSuffix_ :: Monad m => m b -> Stream m a -> Stream m a +intersperseEndByM_ m (Stream step1 state1) = Stream step (Left state1) where {-# INLINE_LATE step #-} step gst (Left st) = do @@ -1514,6 +1593,8 @@ intersperseMSuffix_ m (Stream step1 state1) = Stream step (Left state1) step _ (Right st) = m >> return (Skip (Left st)) +RENAME(intersperseMSuffix_,intersperseEndByM_) + data SuffixSpanState s a = SuffixSpanElem s Int | SuffixSpanSuffix s @@ -1521,19 +1602,28 @@ data SuffixSpanState s a | SuffixSpanLast | SuffixSpanStop --- | Like 'intersperseMSuffix' but intersperses an effectful action into the --- input stream after every @n@ elements and after the last element. +-- | Like 'intersperseEndByM' but intersperses an effectful action into the +-- input stream after every @n@ elements and also after the last element. +-- +-- Example: -- -- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.intersperseMSuffixWith 2 (return ',') input +-- >>> Stream.toList $ Stream.intersperseEndByEveryM 2 (return ',') input -- "he,ll,o," +-- >>> f n x y = Stream.toList $ Stream.intersperseEndByEveryM n (pure x) $ Stream.fromList y +-- >>> f 2 ',' "abcdef" +-- "ab,cd,ef," +-- >>> f 2 ',' "abcdefg" +-- "ab,cd,ef,g," +-- >>> f 2 ',' "a" +-- "a," -- -- /Pre-release/ -- -{-# INLINE_NORMAL intersperseMSuffixWith #-} -intersperseMSuffixWith :: forall m a. Monad m +{-# INLINE_NORMAL intersperseEndByEveryM #-} +intersperseEndByEveryM, intersperseMSuffixWith :: forall m a. Monad m => Int -> m a -> Stream m a -> Stream m a -intersperseMSuffixWith n action (Stream step state) = +intersperseEndByEveryM n action (Stream step state) = Stream step' (SuffixSpanElem state n) where {-# INLINE_LATE step' #-} @@ -1555,23 +1645,31 @@ intersperseMSuffixWith n action (Stream step state) = step' _ SuffixSpanStop = return Stop --- | Insert a side effect before consuming an element of a stream. +RENAME(intersperseMSuffixWith,intersperseEndByEveryM) + +-- | Insert a side effect before every element of a stream. -- -- Definition: -- --- >>> intersperseMPrefix_ m = Stream.mapM (\x -> void m >> return x) +-- >>> intersperseBeginByM_ = Stream.trace_ +-- >>> intersperseBeginByM_ m = Stream.mapM (\x -> void m >> return x) -- --- >>> input = Stream.fromList "hello" --- >>> Stream.fold Fold.toList $ Stream.trace putChar $ Stream.intersperseMPrefix_ (putChar '.' >> return ',') input --- .h.e.l.l.o"hello" +-- Usage: +-- +-- >>> f x y = Stream.fold Fold.drain $ Stream.trace putChar $ Stream.intersperseBeginByM_ x $ Stream.fromList y +-- >>> f (putChar '.') "abc" +-- .h.e.l.l.o -- -- Same as 'trace_'. -- -- /Pre-release/ -- -{-# INLINE intersperseMPrefix_ #-} -intersperseMPrefix_ :: Monad m => m b -> Stream m a -> Stream m a -intersperseMPrefix_ m = mapM (\x -> void m >> return x) +{-# INLINE intersperseBeginByM_ #-} +intersperseBeginByM_, intersperseMPrefix_ :: Monad m => + m b -> Stream m a -> Stream m a +intersperseBeginByM_ m = mapM (\x -> void m >> return x) + +RENAME(intersperseMPrefix_,intersperseBeginByM_) ------------------------------------------------------------------------------ -- Inserting Time @@ -2019,6 +2117,48 @@ catEithers = fmap (either id id) -- Splitting ------------------------------------------------------------------------------ +-- Design note: If we use splitSepBy_ on an empty stream what should be the +-- result? Let's try the splitOn function in the "split" package: +-- +-- > splitOn "a" "" +-- [""] +-- +-- Round tripping the result through intercalate gives identity: +-- +-- > intercalate "a" [""] +-- "" +-- +-- Now let's try intercalate on empty list: +-- +-- > intercalate "a" [] +-- "" +-- +-- Round tripping it with splitOn is not identity: +-- +-- > splitOn "a" "" +-- [""] +-- +-- Because intercalate flattens the two layers, both [] and [""] produce the +-- same result after intercalate. Therefore, inverse of intercalate is not +-- possible. We have to choose one of the two options for splitting an empty +-- stream. +-- +-- Choosing empty stream as the result of splitting empty stream makes better +-- sense. This is different from the split package's choice. Splitting an empty +-- stream resulting into a non-empty stream seems a bit odd. Also, splitting +-- empty stream to empty stream is consistent with splitEndBy operation as +-- well. + +{-# ANN type SplitSepBy Fuse #-} +data SplitSepBy s fs b a + = SplitSepByInit s + | SplitSepByInitFold0 s + | SplitSepByInitFold1 s a + | SplitSepByCheck s a fs + | SplitSepByNext s fs + | SplitSepByYield b (SplitSepBy s fs b a) + | SplitSepByDone + -- | Split on an infixed separator element, dropping the separator. The -- supplied 'Fold' is applied on the split segments. Splits the stream on -- separator elements determined by the supplied predicate, separator is @@ -2026,53 +2166,115 @@ catEithers = fmap (either id id) -- -- Definition: -- --- >>> splitOn p f = Stream.foldMany1 (Fold.takeEndBy_ p f) -- --- >>> splitOn' p xs = Stream.fold Fold.toList $ Stream.splitOn p Fold.toList (Stream.fromList xs) --- >>> splitOn' (== '.') "a.b" +-- Usage: +-- +-- >>> splitOn p xs = Stream.fold Fold.toList $ Stream.splitSepBy_ p Fold.toList (Stream.fromList xs) +-- >>> splitOn (== '.') "a.b" -- ["a","b"] -- --- An empty stream is folded to the default value of the fold: +-- Splitting an empty stream results in an empty stream i.e. zero splits: -- --- >>> splitOn' (== '.') "" --- [""] +-- >>> splitOn (== '.') "" +-- [] +-- +-- If the stream does not contain the separator then it results in a single +-- split: +-- +-- >>> splitOn (== '.') "abc" +-- ["abc"] -- -- If one or both sides of the separator are missing then the empty segment on -- that side is folded to the default output of the fold: -- --- >>> splitOn' (== '.') "." +-- >>> splitOn (== '.') "." -- ["",""] -- --- >>> splitOn' (== '.') ".a" +-- >>> splitOn (== '.') ".a" -- ["","a"] -- --- >>> splitOn' (== '.') "a." +-- >>> splitOn (== '.') "a." -- ["a",""] -- --- >>> splitOn' (== '.') "a..b" +-- >>> splitOn (== '.') "a..b" -- ["a","","b"] -- --- splitOn is an inverse of intercalating single element: +-- 'splitSepBy_' is an inverse of 'unfoldEachSepBy': -- --- > Stream.intercalate (Stream.fromPure '.') Unfold.fromList . Stream.splitOn (== '.') Fold.toList === id +-- > Stream.unfoldEachSepBy '.' Unfold.fromList . Stream.splitSepBy_ (== '.') Fold.toList === id -- -- Assuming the input stream does not contain the separator: -- --- > Stream.splitOn (== '.') Fold.toList . Stream.intercalate (Stream.fromPure '.') Unfold.fromList === id +-- > Stream.splitSepBy_ (== '.') Fold.toList . Stream.unfoldEachSepBy '.' Unfold.fromList === id +-- +{-# INLINE splitSepBy_ #-} +splitSepBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b +-- We can express the infix splitting in terms of optional suffix split +-- fold. After applying a suffix split fold repeatedly if the last segment +-- ends with a suffix then we need to return the default output of the fold +-- after that to make it an infix split. -- +-- Alternately, we can also express it using an optional prefix split fold. +-- If the first segment starts with a prefix then we need to emit the +-- default output of the fold before that to make it an infix split, and +-- then apply prefix split fold repeatedly. +-- +splitSepBy_ predicate (Fold fstep initial _ final) (Stream step1 state1) = + Stream step (SplitSepByInit state1) + + where + + {-# INLINE_LATE step #-} + step gst (SplitSepByInit st) = do + r <- step1 (adaptState gst) st + case r of + Yield x s -> return $ Skip $ SplitSepByInitFold1 s x + Skip s -> return $ Skip (SplitSepByInit s) + Stop -> return Stop + + step _ (SplitSepByInitFold0 st) = do + fres <- initial + return + $ Skip + $ case fres of + FL.Done b -> SplitSepByYield b (SplitSepByInitFold0 st) + FL.Partial fs -> SplitSepByNext st fs + + step _ (SplitSepByInitFold1 st x) = do + fres <- initial + return + $ Skip + $ case fres of + FL.Done b -> SplitSepByYield b (SplitSepByInitFold1 st x) + FL.Partial fs -> SplitSepByCheck st x fs + + step _ (SplitSepByCheck st x fs) = do + if predicate x + then do + b <- final fs + return $ Skip $ SplitSepByYield b (SplitSepByInitFold0 st) + else do + fres <- fstep fs x + return + $ Skip + $ case fres of + FL.Done b -> SplitSepByYield b (SplitSepByInitFold0 st) + FL.Partial fs1 -> SplitSepByNext st fs1 + + step gst (SplitSepByNext st fs) = do + r <- step1 (adaptState gst) st + case r of + Yield x s -> return $ Skip $ SplitSepByCheck s x fs + Skip s -> return $ Skip (SplitSepByNext s fs) + Stop -> do + b <- final fs + return $ Skip $ SplitSepByYield b SplitSepByDone + + step _ (SplitSepByYield b next) = return $ Yield b next + step _ SplitSepByDone = return Stop + +{-# DEPRECATED splitOn "Please use splitSepBy_ instead. Note the difference in behavior on splitting empty stream." #-} {-# INLINE splitOn #-} splitOn :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b splitOn predicate f = - -- We can express the infix splitting in terms of optional suffix split - -- fold. After applying a suffix split fold repeatedly if the last segment - -- ends with a suffix then we need to return the default output of the fold - -- after that to make it an infix split. - -- - -- Alternately, we can also express it using an optional prefix split fold. - -- If the first segment starts with a prefix then we need to emit the - -- default output of the fold before that to make it an infix split, and - -- then apply prefix split fold repeatedly. - -- - -- Since a suffix split fold can be easily expressed using a - -- non-backtracking fold, we use that. - foldMany1 (FL.takeEndBy_ predicate f) + foldManyPost (FL.takeEndBy_ predicate f) diff --git a/core/src/Streamly/Internal/Data/Stream/Type.hs b/core/src/Streamly/Internal/Data/Stream/Type.hs index 8ec0e3aa5f..f38de3485b 100644 --- a/core/src/Streamly/Internal/Data/Stream/Type.hs +++ b/core/src/Streamly/Internal/Data/Stream/Type.hs @@ -102,10 +102,12 @@ module Streamly.Internal.Data.Stream.Type , crossApplySnd , crossWith , cross + , loop -- forEach + , loopBy -- * Unfold Many , ConcatMapUState (..) - , unfoldMany + , unfoldEach -- * Concat -- | Generate streams by mapping a stream generator on each element of an @@ -130,8 +132,8 @@ module Streamly.Internal.Data.Stream.Type , FoldMany (..) -- for inspection testing , FoldManyPost (..) , foldMany - , foldMany1 , foldManyPost + , foldManySepBy , groupsOf , refoldMany , refoldIterateM @@ -150,9 +152,11 @@ module Streamly.Internal.Data.Stream.Type -- * Deprecated , sliceOnSuffix + , unfoldMany ) where +#include "deprecation.h" #include "inline.hs" #if !MIN_VERSION_base(4,18,0) @@ -186,9 +190,7 @@ import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import qualified Streamly.Internal.Data.Fold.Type as FL hiding (foldr) import qualified Streamly.Internal.Data.StreamK.Type as K -#ifdef USE_UNFOLDS_EVERYWHERE import qualified Streamly.Internal.Data.Unfold.Type as Unfold -#endif #include "DocTestDataStream.hs" @@ -465,7 +467,7 @@ foldBreak fld strm = do nil = Stream (\_ _ -> return Stop) () -- >>> fold f = Fold.extractM . Stream.foldAddLazy f --- >>> fold f = Stream.fold Fold.one . Stream.foldMany1 f +-- >>> fold f = Stream.fold Fold.one . Stream.foldMany0 f -- >>> fold f = Fold.extractM <=< Stream.foldAdd f -- | Fold a stream using the supplied left 'Fold' and reducing the resulting @@ -1165,6 +1167,8 @@ zipWith f = zipWithM (\a b -> return (f a b)) -- Combine N Streams - concatAp ------------------------------------------------------------------------------ +-- XXX unfoldApplyEach + -- | Apply a stream of functions to a stream of values and flatten the results. -- -- Note that the second stream is evaluated multiple times. @@ -1256,6 +1260,10 @@ instance Applicative f => Applicative (Stream f) where (<*) = crossApplyFst -} +-- XXX We can use @Stream Identity b@ as the second stream to avoid running +-- effects multiple times. Or it could be an array or an unfold i.e. +-- unfoldCross. + -- | -- Definition: -- @@ -1289,8 +1297,28 @@ crossWith f m1 m2 = fmap f m1 `crossApply` m2 cross :: Monad m => Stream m a -> Stream m b -> Stream m (a, b) cross = crossWith (,) +-- crossWith/cross should ideally use Stream m b as the first stream, because +-- we are transforming Stream m a using that. We provide loop with arguments +-- flipped. + +-- | Loop the supplied stream (first argument) around each element of the input +-- stream (second argument) generating tuples. This is an argument flipped +-- version of 'cross'. +{-# INLINE loop #-} +loop :: Monad m => Stream m b -> Stream m a -> Stream m (a, b) +loop = crossWith (\b a -> (a,b)) + +-- | Loop by unfold. Unfold a value into a stream and nest it with the input +-- stream. This is much faster than 'loop' due to stream fusion. +{-# INLINE loopBy #-} +loopBy :: Monad m => Unfold m x b -> x -> Stream m a -> Stream m (a, b) +loopBy u x s = + let u1 = Unfold.lmap snd u + u2 = Unfold.map2 (\i b -> (fst i, b)) u1 + in unfoldEach u2 $ fmap (, x) s + ------------------------------------------------------------------------------ --- Combine N Streams - unfoldMany +-- Combine N Streams - unfoldEach ------------------------------------------------------------------------------ {-# ANN type ConcatMapUState Fuse #-} @@ -1298,7 +1326,7 @@ data ConcatMapUState o i = ConcatMapUOuter o | ConcatMapUInner o i --- | @unfoldMany unfold stream@ uses @unfold@ to map the input stream elements +-- | @unfoldEach unfold stream@ uses @unfold@ to map the input stream elements -- to streams and then flattens the generated streams into a single output -- stream. @@ -1311,9 +1339,9 @@ data ConcatMapUState o i = -- 'concatMap' this can fuse the 'Unfold' code with the inner loop and -- therefore provide many times better performance. -- -{-# INLINE_NORMAL unfoldMany #-} -unfoldMany :: Monad m => Unfold m a b -> Stream m a -> Stream m b -unfoldMany (Unfold istep inject) (Stream ostep ost) = +{-# INLINE_NORMAL unfoldEach #-} +unfoldEach, unfoldMany :: Monad m => Unfold m a b -> Stream m a -> Stream m b +unfoldEach (Unfold istep inject) (Stream ostep ost) = Stream step (ConcatMapUOuter ost) where {-# INLINE_LATE step #-} @@ -1333,6 +1361,8 @@ unfoldMany (Unfold istep inject) (Stream ostep ost) = Skip i' -> Skip (ConcatMapUInner o i') Stop -> Skip (ConcatMapUOuter o) +RENAME(unfoldMany,unfoldEach) + ------------------------------------------------------------------------------ -- Combine N Streams - concatMap ------------------------------------------------------------------------------ @@ -1344,7 +1374,7 @@ unfoldMany (Unfold istep inject) (Stream ostep ost) = -- generation function is monadic, unlike 'concatMap', it can produce an -- effect at the beginning of each iteration of the inner loop. -- --- See 'unfoldMany' for a fusible alternative. +-- See 'unfoldEach' for a fusible alternative. -- {-# INLINE_NORMAL concatMapM #-} concatMapM :: Monad m => (a -> m (Stream m b)) -> Stream m a -> Stream m b @@ -1381,9 +1411,9 @@ concatMapM f (Stream step state) = Stream step' (Left state) -- -- >>> concatMap f = Stream.concatMapM (return . f) -- >>> concatMap f = Stream.concat . fmap f --- >>> concatMap f = Stream.unfoldMany (Unfold.lmap f Unfold.fromStream) +-- >>> concatMap f = Stream.unfoldEach (Unfold.lmap f Unfold.fromStream) -- --- See 'unfoldMany' for a fusible alternative. +-- See 'unfoldEach' for a fusible alternative. -- {-# INLINE concatMap #-} concatMap :: Monad m => (a -> Stream m b) -> Stream m a -> Stream m b @@ -1412,8 +1442,8 @@ concat = concatMap id -- >>> concatEffect = Stream.concat . lift -- requires (MonadTrans t) -- >>> concatEffect = join . lift -- requires (MonadTrans t, Monad (Stream m)) --- | Given a stream value in the underlying monad, lift and join the underlying --- monad with the stream monad. +-- | Flatten a stream generated by an effect i.e. concat the effect monad with +-- the stream monad. -- -- >>> concatEffect = Stream.concat . Stream.fromEffect -- >>> concatEffect eff = Stream.concatMapM (\() -> eff) (Stream.fromPure ()) @@ -1798,7 +1828,8 @@ data FoldManyPost s fs b a -- Note that using a closed fold e.g. @Fold.take 0@, would result in an -- infinite stream without consuming the input. -- --- Like foldMany1, "scan" should ideally be "scan1" always resulting in a +-- We can call foldManyPost as foldMany0, but we should probably remove it. +-- Like foldMany0, "scan" should ideally be "scan0" always resulting in a -- non-empty stream, and "postscan" should be called just "scan" because it is -- much more common. But those names cannot be changed now. @@ -1809,7 +1840,7 @@ data FoldManyPost s fs b a -- Example, empty stream, compare with 'foldMany': -- -- >>> f = Fold.take 2 Fold.toList --- >>> fmany = Stream.fold Fold.toList . Stream.foldMany1 f +-- >>> fmany = Stream.fold Fold.toList . Stream.foldMany0 f -- >>> fmany $ Stream.fromList [] -- [[]] -- @@ -1825,9 +1856,9 @@ data FoldManyPost s fs b a -- -- /Pre-release/ -- -{-# INLINE_NORMAL foldMany1 #-} -foldMany1 :: Monad m => Fold m a b -> Stream m a -> Stream m b -foldMany1 (Fold fstep initial _ final) (Stream step state) = +{-# INLINE_NORMAL foldManyPost #-} +foldManyPost :: Monad m => Fold m a b -> Stream m a -> Stream m b +foldManyPost (Fold fstep initial _ final) (Stream step state) = Stream step' (FoldManyPostStart state) where @@ -1860,10 +1891,13 @@ foldMany1 (Fold fstep initial _ final) (Stream step state) = step' _ (FoldManyPostYield b next) = return $ Yield b next step' _ FoldManyPostDone = return Stop -{-# DEPRECATED foldManyPost "Please use foldMany1 instead." #-} -{-# INLINE foldManyPost #-} -foldManyPost :: Monad m => Fold m a b -> Stream m a -> Stream m b -foldManyPost = foldMany1 +-- | Apply fold f1 infix separated by fold f2. +-- +-- /Unimplemented/ +{-# INLINE_NORMAL foldManySepBy #-} +foldManySepBy :: -- Monad m => + Fold m a b -> Fold m a b -> Stream m a -> Stream m b +foldManySepBy _f1 _f2 = undefined {-# ANN type FoldMany Fuse #-} data FoldMany s fs b a @@ -1878,7 +1912,7 @@ data FoldMany s fs b a -- | Apply a terminating 'Fold' repeatedly on a stream and emit the results in -- the output stream. If the last fold is empty, it's result is not emitted. -- This means if the input stream is empty the result is also an empty stream. --- See 'foldMany1' for an alternate behavior which always results in a +-- See 'foldManyPost' for an alternate behavior which always results in a -- non-empty stream even if the input stream is empty. -- -- Definition: @@ -1949,10 +1983,14 @@ foldMany (Fold fstep initial _ final) (Stream step state) = -- | Group the input stream into groups of @n@ elements each and then fold each -- group using the provided fold function. -- --- @groupsOf n f = foldMany (FL.take n f)@ +-- Definition: +-- +-- >>> groupsOf n f = Stream.foldMany (Fold.take n f) -- --- >>> Stream.toList $ Stream.groupsOf 2 Fold.sum (Stream.enumerateFromTo 1 10) --- [3,7,11,15,19] +-- Usage: +-- +-- >>> Stream.toList $ Stream.groupsOf 2 Fold.toList (Stream.enumerateFromTo 1 10) +-- [[1,2],[3,4],[5,6],[7,8],[9,10]] -- -- This can be considered as an n-fold version of 'take' where we apply -- 'take' repeatedly on the leftover stream until the stream exhausts. @@ -2075,7 +2113,9 @@ indexerBy (Fold step1 initial1 extract1 _final) n = extract (Tuple' i s) = (i,) <$> extract1 s --- | Like 'splitOnSuffix' but generates a stream of (index, len) tuples marking +-- XXX rename to indicesEndBy + +-- | Like 'splitEndBy' but generates a stream of (index, len) tuples marking -- the places where the predicate matches in the stream. -- -- /Pre-release/ diff --git a/core/src/Streamly/Internal/Data/StreamK/Type.hs b/core/src/Streamly/Internal/Data/StreamK/Type.hs index 23808a1112..c43d9b87db 100644 --- a/core/src/Streamly/Internal/Data/StreamK/Type.hs +++ b/core/src/Streamly/Internal/Data/StreamK/Type.hs @@ -115,8 +115,8 @@ module Streamly.Internal.Data.StreamK.Type -- ** Interleave , interleave - , interleaveFst - , interleaveMin + , interleaveEndBy' + , interleaveSepBy -- ** Cross Product , crossApplyWith @@ -144,6 +144,10 @@ module Streamly.Internal.Data.StreamK.Type -- * Buffered Operations , foldlS , reverse + + -- * Deprecated + , interleaveFst + , interleaveMin ) where @@ -1355,6 +1359,16 @@ bindWith par m1 f = go m1 -- argument specifies a merge or concat function that is used to merge the -- streams generated by the map function. -- +-- For example, interleaving n streams in a left biased manner: +-- +-- >>> fromList = StreamK.fromStream . Stream.fromList +-- >>> toList = Stream.toList . StreamK.toStream +-- >>> lists = fromList [[1,5],[2,6],[3,7],[4,8]] +-- >>> toList $ StreamK.concatMapWith StreamK.interleave fromList lists +-- [1,2,5,3,6,4,7,8] +-- +-- For a fair interleaving example see 'mergeMapWith'. +-- {-# INLINE concatMapWith #-} concatMapWith :: @@ -1386,21 +1400,38 @@ concatMap_ f xs = buildS (\c n -> foldrSShared (\x b -> foldrSShared c b (unShare $ f x)) n xs) -} +-- XXX Instead of using "mergeMapWith interleave" we can implement an N-way +-- interleaving CPS combinator which behaves like unfoldEachInterleave. Instead +-- of pairing up the streams we just need to go yielding one element from each +-- stream and storing the remaining streams and then keep doing rounds through +-- those in a round robin fashion. This would be much like wAsync. + -- | Combine streams in pairs using a binary combinator, the resulting streams -- are then combined again in pairs recursively until we get to a single -- combined stream. The composition would thus form a binary tree. -- --- For example, you can sort a stream using merge sort like this: +-- For example, sorting a stream using merge sort: -- --- >>> s = StreamK.fromStream $ Stream.fromList [5,1,7,9,2] +-- >>> fromList = StreamK.fromStream . Stream.fromList +-- >>> toList = Stream.toList . StreamK.toStream -- >>> generate = StreamK.fromPure -- >>> combine = StreamK.mergeBy compare --- >>> Stream.fold Fold.toList $ StreamK.toStream $ StreamK.mergeMapWith combine generate s +-- >>> toList $ StreamK.mergeMapWith combine generate (fromList [5,1,7,9,2]) -- [1,2,5,7,9] -- +-- Interleaving n streams in a balanced manner: +-- +-- >>> lists = fromList [[1,4,7],[2,5,8],[3,6,9]] +-- >>> toList $ StreamK.mergeMapWith StreamK.interleave fromList lists +-- [1,3,2,6,4,9,5,7,8] +-- +-- See 'Streamly.Data.Stream.unfoldEachInterleave' for a much faster fused +-- version of the above example. +-- -- Note that if the stream length is not a power of 2, the binary tree composed --- by mergeMapWith would not be balanced, which may or may not be important --- depending on what you are trying to achieve. +-- by mergeMapWith is not balanced, which may or may not be important depending +-- on what you are trying to achieve. This also explains the order of the +-- output in the interleaving example above. -- -- /Caution: the stream of streams must be finite/ -- @@ -1670,13 +1701,24 @@ interleave m1 m2 = mkStream $ \st yld sng stp -> do yieldk a r = yld a (interleave m2 r) foldStream st yieldk single stop m1 -infixr 6 `interleaveFst` +-- Examples: +-- +-- >>> fromList = StreamK.fromStream . Stream.fromList +-- >>> toList = Stream.toList . StreamK.toStream +-- >>> f x y = toList $ StreamK.interleaveSepBy (fromList x) (fromList y) +-- +-- -- This is broken. +-- >> f "..." "abc" +-- "a.b.c" --- | Like `interleave` but stops interleaving as soon as the first stream stops. +-- >>> f ".." "abc" +-- "a.b.c" +-- >>> f "." "abc" +-- "a.bc" -- -{-# INLINE interleaveFst #-} -interleaveFst :: StreamK m a -> StreamK m a -> StreamK m a -interleaveFst m1 m2 = mkStream $ \st yld sng stp -> do +{-# INLINE interleaveSepBy #-} +interleaveSepBy :: StreamK m a -> StreamK m a -> StreamK m a +interleaveSepBy m2 m1 = mkStream $ \st yld sng stp -> do let yieldFirst a r = yld a (yieldSecond r m2) in foldStream st yieldFirst sng stp m1 @@ -1688,22 +1730,48 @@ interleaveFst m1 m2 = mkStream $ \st yld sng stp -> do yieldk a r = yld a (interleave s1 r) in foldStream st yieldk single stop s2 -infixr 6 `interleaveMin` +infixr 6 `interleaveFst` + +{-# DEPRECATED interleaveFst "Please use flip interleaveSepBy instead." #-} +{-# INLINE interleaveFst #-} +interleaveFst :: StreamK m a -> StreamK m a -> StreamK m a +interleaveFst = flip interleaveSepBy --- | Like `interleave` but stops interleaving as soon as any of the two streams --- stops. +-- | -- -{-# INLINE interleaveMin #-} -interleaveMin :: StreamK m a -> StreamK m a -> StreamK m a -interleaveMin m1 m2 = mkStream $ \st yld _ stp -> do +-- Examples: +-- +-- >>> fromList = StreamK.fromStream . Stream.fromList +-- >>> toList = Stream.toList . StreamK.toStream +-- >>> f x y = toList $ StreamK.interleaveEndBy' (fromList x) (fromList y) +-- >>> f "..." "abc" +-- "a.b.c." +-- >>> f "..." "ab" +-- "a.b." +-- +-- Currently broken, generates an additional element at the end:: +-- +-- >> f ".." "abc" +-- "a.b." +-- +{-# INLINE interleaveEndBy' #-} +interleaveEndBy' :: StreamK m a -> StreamK m a -> StreamK m a +interleaveEndBy' m2 m1 = mkStream $ \st yld _ stp -> do let stop = stp -- "single a" is defined as "yld a (interleaveMin m2 nil)" instead of -- "sng a" to keep the behaviour consistent with the yield -- continuation. - single a = yld a (interleaveMin m2 nil) - yieldk a r = yld a (interleaveMin m2 r) + single a = yld a (interleaveEndBy' nil m2) + yieldk a r = yld a (interleaveEndBy' r m2) foldStream st yieldk single stop m1 +infixr 6 `interleaveMin` + +{-# DEPRECATED interleaveMin "Please use flip interleaveEndBy' instead." #-} +{-# INLINE interleaveMin #-} +interleaveMin :: StreamK m a -> StreamK m a -> StreamK m a +interleaveMin = flip interleaveEndBy' + ------------------------------------------------------------------------------- -- Generation ------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Data/Unfold/Type.hs b/core/src/Streamly/Internal/Data/Unfold/Type.hs index 9c165324b4..e56e4c30a4 100644 --- a/core/src/Streamly/Internal/Data/Unfold/Type.hs +++ b/core/src/Streamly/Internal/Data/Unfold/Type.hs @@ -26,7 +26,7 @@ -- much less efficient when compared to combinators using 'Unfold'. For -- example, the 'Streamly.Data.Stream.concatMap' combinator which uses @a -> t m b@ -- (where @t@ is a stream type) to generate streams is much less efficient --- compared to 'Streamly.Data.Stream.unfoldMany'. +-- compared to 'Streamly.Data.Stream.unfoldEach'. -- -- On the other hand, transformation operations on stream types are as -- efficient as transformations on 'Unfold'. @@ -62,6 +62,7 @@ module Streamly.Internal.Data.Unfold.Type -- * From Containers , fromList + , fromTuple -- * Transformations , lmap @@ -81,10 +82,10 @@ module Streamly.Internal.Data.Unfold.Type -- * Nesting , ConcatState (..) - , many - , many2 - , manyInterleave - -- , manyInterleave2 + , unfoldEach + , unfoldEach2 + , unfoldEachInterleave + -- , unfoldEachInterleave2 -- Applicative , crossApplySnd @@ -101,9 +102,15 @@ module Streamly.Internal.Data.Unfold.Type , zipWithM , zipWith + + -- * Deprecated + , many + , many2 + , manyInterleave ) where +#include "deprecation.h" #include "inline.hs" -- import Control.Arrow (Arrow(..)) @@ -146,13 +153,13 @@ import Prelude hiding (map, mapM, concatMap, zipWith, takeWhile) -- -- This allows an important optimization to occur in several cases, making the -- 'Unfold' a more efficient abstraction. Consider the 'concatMap' and --- 'unfoldMany' operations, the latter is more efficient. 'concatMap' +-- 'unfoldEach' operations, the latter is more efficient. 'concatMap' -- generates a new stream object from each element in the stream by applying -- the supplied function to the element, the stream object includes the "step" -- function as well as the initial "state" of the stream. Since the stream is -- generated dynamically the compiler does not know the step function or the -- state type statically at compile time, therefore, it cannot inline it. On --- the other hand in case of 'unfoldMany' the compiler has visibility into +-- the other hand in case of 'unfoldEach' the compiler has visibility into -- the unfold's state generation function, therefore, the compiler knows all -- the types statically and it can inline the inject as well as the step -- functions, generating efficient code. Essentially, the stream is not opaque @@ -275,9 +282,9 @@ unfoldr step = unfoldrM (pure . step) -- >>> Unfold.fold Fold.toList u [1..5] -- [2,3,4,5,6] -- --- @ --- lmap f = Unfold.many (Unfold.function f) --- @ +-- Definition: +-- +-- >>> lmap f = Unfold.unfoldEach (Unfold.function f) -- {-# INLINE_NORMAL lmap #-} lmap :: (a -> c) -> Unfold m c b -> Unfold m a b @@ -285,9 +292,9 @@ lmap f (Unfold ustep uinject) = Unfold ustep (uinject Prelude.. f) -- | Map an action on the input argument of the 'Unfold'. -- --- @ --- lmapM f = Unfold.many (Unfold.functionM f) --- @ +-- Definition: +-- +-- lmapM f = Unfold.unfoldEach (Unfold.functionM f) -- {-# INLINE_NORMAL lmapM #-} lmapM :: Monad m => (a -> m c) -> Unfold m c b -> Unfold m a b @@ -502,6 +509,21 @@ fromEffect m = Unfold step inject fromPure :: Applicative m => b -> Unfold m a b fromPure = fromEffect Prelude.. pure +data TupleState a = TupleBoth a a | TupleOne a | TupleNone + +-- | Convert a tuple to a 'Stream'. +-- +{-# INLINE_LATE fromTuple #-} +fromTuple :: Applicative m => Unfold m (a,a) a +fromTuple = Unfold step (\(x,y) -> pure $ TupleBoth x y) + + where + + {-# INLINE_LATE step #-} + step (TupleBoth x y) = pure $ Yield x (TupleOne y) + step (TupleOne y) = pure $ Yield y TupleNone + step TupleNone = pure Stop + -- XXX Check if "unfold (fromList [1..10])" fuses, if it doesn't we can use -- rewrite rules to rewrite list enumerations to unfold enumerations. @@ -538,9 +560,10 @@ crossApplyFst (Unfold _step1 _inject1) (Unfold _step2 _inject2) = undefined {-# ANN type Many2State Fuse #-} data Many2State x s1 s2 = Many2Outer x s1 | Many2Inner x s1 s2 -{-# INLINE_NORMAL many2 #-} -many2 :: Monad m => Unfold m (a, b) c -> Unfold m a b -> Unfold m a c -many2 (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject +{-# INLINE_NORMAL unfoldEach2 #-} +unfoldEach2, many2 :: Monad m => + Unfold m (a, b) c -> Unfold m a b -> Unfold m a c +unfoldEach2 (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject where @@ -565,6 +588,8 @@ many2 (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject Skip s -> Skip (Many2Inner a ost s) Stop -> Skip (Many2Outer a ost) +RENAME(many2,unfoldEach2) + data Cross a s1 b s2 = CrossOuter a s1 | CrossInner a s1 b s2 -- | Create a cross product (vector product or cartesian product) of the @@ -792,12 +817,12 @@ data ConcatState s1 s2 = ConcatOuter s1 | ConcatInner s1 s2 -- | Apply the first unfold to each output element of the second unfold and -- flatten the output in a single stream. -- --- >>> many u = Unfold.many2 (Unfold.lmap snd u) +-- >>> unfoldEach u = Unfold.unfoldEach2 (Unfold.lmap snd u) -- -{-# INLINE_NORMAL many #-} -many :: Monad m => Unfold m b c -> Unfold m a b -> Unfold m a c +{-# INLINE_NORMAL unfoldEach #-} +unfoldEach, many :: Monad m => Unfold m b c -> Unfold m a b -> Unfold m a c -- many u1 = many2 (lmap snd u1) -many (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject +unfoldEach (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject where @@ -822,6 +847,8 @@ many (Unfold step2 inject2) (Unfold step1 inject1) = Unfold step inject Skip s -> Skip (ConcatInner ost s) Stop -> Skip (ConcatOuter ost) +RENAME(many,unfoldEach) + {- -- XXX There are multiple possible ways to combine the unfolds, "many" appends -- them, we could also have other variants of "many" e.g. manyInterleave. @@ -939,7 +966,7 @@ data ManyInterleaveState o i = | ManyInterleaveInnerL [i] [i] | ManyInterleaveInnerR [i] [i] --- | 'Streamly.Internal.Data.Stream.unfoldManyInterleave' for +-- | 'Streamly.Internal.Data.Stream.unfoldEachInterleave' for -- documentation and notes. -- -- This is almost identical to unfoldManyInterleave in StreamD module. @@ -947,9 +974,10 @@ data ManyInterleaveState o i = -- The 'many' combinator is in fact 'manyAppend' to be more explicit in naming. -- -- /Internal/ -{-# INLINE_NORMAL manyInterleave #-} -manyInterleave :: Monad m => Unfold m a b -> Unfold m c a -> Unfold m c b -manyInterleave (Unfold istep iinject) (Unfold ostep oinject) = +{-# INLINE_NORMAL unfoldEachInterleave #-} +unfoldEachInterleave, manyInterleave :: Monad m => + Unfold m a b -> Unfold m c a -> Unfold m c b +unfoldEachInterleave (Unfold istep iinject) (Unfold ostep oinject) = Unfold step inject where @@ -997,3 +1025,5 @@ manyInterleave (Unfold istep iinject) (Unfold ostep oinject) = Yield x s -> Yield x (ManyInterleaveInnerR (s:ls) rs) Skip s -> Skip (ManyInterleaveInnerR ls (s:rs)) Stop -> Skip (ManyInterleaveInnerR ls rs) + +RENAME(manyInterleave,unfoldEachInterleave) diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs index f91ffb1be9..751f064f2e 100644 --- a/core/src/Streamly/Internal/FileSystem/DirIO.hs +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -350,7 +350,7 @@ readEitherChunks dirs = -- XXX Need to use a take to limit the group size. There will be separate -- limits for dir and files groups. S.groupsWhile grouper collector - $ S.unfoldMany eitherReaderPaths + $ S.unfoldEach eitherReaderPaths $ S.fromList dirs where diff --git a/core/src/Streamly/Internal/FileSystem/File.hs b/core/src/Streamly/Internal/FileSystem/File.hs index 3d878a6e15..791577c649 100644 --- a/core/src/Streamly/Internal/FileSystem/File.hs +++ b/core/src/Streamly/Internal/FileSystem/File.hs @@ -336,7 +336,7 @@ readWithBufferOf = readerWith -- /Pre-release/ {-# INLINE reader #-} reader :: (MonadIO m, MonadCatch m) => Unfold m FilePath Word8 -reader = UF.many A.reader (usingFile FH.chunkReader) +reader = UF.unfoldEach A.reader (usingFile FH.chunkReader) -- | Generate a stream of bytes from a file specified by path. The stream ends -- when EOF is encountered. File is locked using multiple reader and single diff --git a/core/src/Streamly/Internal/FileSystem/Handle.hs b/core/src/Streamly/Internal/FileSystem/Handle.hs index 1746456e7d..899a7d8e4c 100644 --- a/core/src/Streamly/Internal/FileSystem/Handle.hs +++ b/core/src/Streamly/Internal/FileSystem/Handle.hs @@ -320,11 +320,11 @@ chunkReader = UF.first defaultChunkSize chunkReaderWith -- | Unfolds the tuple @(bufsize, handle)@ into a byte stream, read requests -- to the IO device are performed using buffers of @bufsize@. -- --- >>> readerWith = Unfold.many Array.reader Handle.chunkReaderWith +-- >>> readerWith = Unfold.unfoldEach Array.reader Handle.chunkReaderWith -- {-# INLINE readerWith #-} readerWith :: MonadIO m => Unfold m (Int, Handle) Word8 -readerWith = UF.many A.reader chunkReaderWith +readerWith = UF.unfoldEach A.reader chunkReaderWith -- | Same as 'readerWith' -- @@ -336,7 +336,7 @@ readWithBufferOf = readerWith -- | @readWith bufsize handle@ reads a byte stream from a file -- handle, reads are performed in chunks of up to @bufsize@. -- --- >>> readWith size h = Stream.unfoldMany Array.reader $ Handle.readChunksWith size h +-- >>> readWith size h = Stream.unfoldEach Array.reader $ Handle.readChunksWith size h -- -- /Pre-release/ {-# INLINE readWith #-} @@ -347,15 +347,15 @@ readWith size h = A.concat $ readChunksWith size h -- performed in sizes of -- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. -- --- >>> reader = Unfold.many Array.reader Handle.chunkReader +-- >>> reader = Unfold.unfoldEach Array.reader Handle.chunkReader -- {-# INLINE reader #-} reader :: MonadIO m => Unfold m Handle Word8 -reader = UF.many A.reader chunkReader +reader = UF.unfoldEach A.reader chunkReader -- | Generate a byte stream from a file 'Handle'. -- --- >>> read h = Stream.unfoldMany Array.reader $ Handle.readChunks h +-- >>> read h = Stream.unfoldEach Array.reader $ Handle.readChunks h -- -- /Pre-release/ {-# INLINE read #-} @@ -407,7 +407,7 @@ putChunks h = S.fold (FL.drainMapM (putChunk h)) {-# INLINE putChunksWith #-} putChunksWith :: (MonadIO m, Unbox a) => Int -> Handle -> Stream m (Array a) -> m () -putChunksWith n h xs = putChunks h $ A.compactLE n xs +putChunksWith n h xs = putChunks h $ A.compactMax n xs -- > putBytesWith n h m = Handle.putChunks h $ A.pinnedChunksOf n m @@ -456,7 +456,10 @@ chunkWriter = Refold.drainBy putChunk {-# INLINE writeChunksWith #-} writeChunksWith :: (MonadIO m, Unbox a) => Int -> Handle -> Fold m (Array a) () -writeChunksWith n h = A.lCompactGE n (writeChunks h) +-- writeChunksWith n h = A.lCompactGE n (writeChunks h) +writeChunksWith n h = + FL.postscanl (A.scanCompactMin n) + $ FL.catMaybes (writeChunks h) -- | Same as 'writeChunksWith' -- diff --git a/core/src/Streamly/Internal/Unicode/Stream.hs b/core/src/Streamly/Internal/Unicode/Stream.hs index 41d7e61f32..5eb99aadf7 100644 --- a/core/src/Streamly/Internal/Unicode/Stream.hs +++ b/core/src/Streamly/Internal/Unicode/Stream.hs @@ -18,10 +18,12 @@ module Streamly.Internal.Unicode.Stream -- -- $setup + -- XXX Use to/from instead of encode/decode for more compact naming. + -- * Construction (Decoding) decodeLatin1 - -- ** UTF-8 Decoding + -- ** UTF-8 Byte Stream Decoding , CodingFailureMode(..) , writeCharUtf8' , parseCharUtf8With @@ -29,11 +31,11 @@ module Streamly.Internal.Unicode.Stream , decodeUtf8' , decodeUtf8_ - -- ** UTF-16 Decoding + -- ** UTF-16 Byte Stream Decoding , decodeUtf16le' , decodeUtf16le - -- ** Resumable UTF-8 Decoding + -- ** Resumable UTF-8 Byte Stream Decoding , DecodeError(..) , DecodeState , CodePoint @@ -44,14 +46,15 @@ module Streamly.Internal.Unicode.Stream , decodeUtf8Chunks , decodeUtf8Chunks' , decodeUtf8Chunks_ + -- , fromUtf8ChunksEndByLn -- * Elimination (Encoding) - -- ** Latin1 Encoding + -- ** Latin1 Encoding to Byte Stream , encodeLatin1 , encodeLatin1' , encodeLatin1_ - -- ** UTF-8 Encoding + -- ** UTF-8 Encoding to Byte Stream , readCharUtf8' , readCharUtf8 , readCharUtf8_ @@ -60,7 +63,18 @@ module Streamly.Internal.Unicode.Stream , encodeUtf8_ , encodeStrings - -- ** UTF-16 Encoding + -- ** UTF-8 Encoding to Chunk Stream + -- , toUtf8Chunks + -- , toUtf8Chunks' + -- , toUtf8Chunks_ + -- , toUtf8ChunksEndByLn + + -- , toPinnedUtf8Chunks + -- , toPinnedUtf8Chunks' + -- , toPinnedUtf8Chunks_ + -- , toPinnedUtf8ChunksEndByLn + + -- ** UTF-16 Encoding to Byte Stream , encodeUtf16le' , encodeUtf16le {- @@ -71,10 +85,10 @@ module Streamly.Internal.Unicode.Stream -- * Transformation , stripHead - , lines - , words - , unlines - , unwords + , lines -- foldLines + , words -- foldWords + , unlines -- unfoldLines + , unwords -- unfoldWords -- * StreamD UTF8 Encoding / Decoding transformations. , decodeUtf8D @@ -1093,7 +1107,7 @@ readCharUtf8' = -- paths (slow path). {-# INLINE_NORMAL encodeUtf8D' #-} encodeUtf8D' :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D' = D.unfoldMany readCharUtf8' +encodeUtf8D' = D.unfoldEach readCharUtf8' -- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When -- any invalid character (U+D800-U+D8FF) is encountered in the input stream the @@ -1112,7 +1126,7 @@ readCharUtf8 = readCharUtf8With $ WCons 239 (WCons 191 (WCons 189 WNil)) -- {-# INLINE_NORMAL encodeUtf8D #-} encodeUtf8D :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D = D.unfoldMany readCharUtf8 +encodeUtf8D = D.unfoldEach readCharUtf8 -- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any -- Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the @@ -1128,7 +1142,7 @@ readCharUtf8_ = readCharUtf8With WNil {-# INLINE_NORMAL encodeUtf8D_ #-} encodeUtf8D_ :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D_ = D.unfoldMany readCharUtf8_ +encodeUtf8D_ = D.unfoldEach readCharUtf8_ -- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any -- Invalid characters (U+D800-U+D8FF) in the input stream are dropped. @@ -1181,7 +1195,7 @@ readCharUtf16With invalidReplacement = Unfold step inject {-# INLINE encodeUtf16' #-} encodeUtf16' :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16' = D.unfoldMany (readCharUtf16With errString) +encodeUtf16' = D.unfoldEach (readCharUtf16With errString) where errString = error @@ -1190,7 +1204,7 @@ encodeUtf16' = D.unfoldMany (readCharUtf16With errString) {-# INLINE encodeUtf16 #-} encodeUtf16 :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16 = D.unfoldMany (readCharUtf16With WNil) +encodeUtf16 = D.unfoldEach (readCharUtf16With WNil) -- | Similar to 'encodeUtf16le' but throws an error if any invalid character is -- encountered. @@ -1221,6 +1235,8 @@ encodeUtf16le = -- Decoding string literals ------------------------------------------------------------------------------- +-- XXX decodeCString# + -- | Read UTF-8 encoded bytes as chars from an 'Addr#' until a 0 byte is -- encountered, the 0 byte is not included in the stream. -- @@ -1288,7 +1304,7 @@ stripTail = undefined -- | Remove leading whitespace from a string. -- --- > stripHead = Stream.dropWhile isSpace +-- >>> stripHead = Stream.dropWhile Char.isSpace -- -- /Pre-release/ {-# INLINE stripHead #-} @@ -1298,11 +1314,15 @@ stripHead = Stream.dropWhile isSpace -- | Fold each line of the stream using the supplied 'Fold' -- and stream the result. -- --- >>> Stream.fold Fold.toList $ Unicode.lines Fold.toList (Stream.fromList "lines\nthis\nstring\n\n\n") --- ["lines","this","string","",""] +-- Definition: -- -- >>> lines f = Stream.foldMany (Fold.takeEndBy_ (== '\n') f) -- +-- Usage: +-- +-- >>> Stream.toList $ Unicode.lines Fold.toList (Stream.fromList "line1\nline2\nline3\n\n\n") +-- ["line1","line2","line3","",""] +-- -- /Pre-release/ {-# INLINE lines #-} lines :: Monad m => Fold m Char b -> Stream m Char -> Stream m b @@ -1326,13 +1346,16 @@ isSpace c where uc = fromIntegral (ord c) :: Word --- | Fold each word of the stream using the supplied 'Fold' --- and stream the result. +-- | Fold each word of the stream using the supplied 'Fold'. +-- +-- Definition: -- --- >>> Stream.fold Fold.toList $ Unicode.words Fold.toList (Stream.fromList "fold these words") --- ["fold","these","words"] +-- >>> words = Stream.wordsBy Char.isSpace -- --- > words = Stream.wordsBy isSpace +-- Usage: +-- +-- >>> Stream.toList $ Unicode.words Fold.toList (Stream.fromList " ab cd ef ") +-- ["ab","cd","ef"] -- -- /Pre-release/ {-# INLINE words #-} @@ -1342,26 +1365,24 @@ words f = D.wordsBy isSpace f -- | Unfold a stream to character streams using the supplied 'Unfold' -- and concat the results suffixing a newline character @\\n@ to each stream. -- --- @ --- unlines = Stream.interposeSuffix '\n' --- unlines = Stream.intercalateSuffix Unfold.fromList "\n" --- @ +-- Definition: +-- +-- >>> unlines = Stream.unfoldEachEndBy '\n' +-- >>> unlines = Stream.unfoldEachEndBySeq "\n" Unfold.fromList -- -- /Pre-release/ {-# INLINE unlines #-} unlines :: MonadIO m => Unfold m a Char -> Stream m a -> Stream m Char -unlines = Stream.interposeSuffix '\n' +unlines = Stream.unfoldEachEndBy '\n' -- | Unfold the elements of a stream to character streams using the supplied -- 'Unfold' and concat the results with a whitespace character infixed between -- the streams. -- --- @ --- unwords = Stream.interpose ' ' --- unwords = Stream.intercalate Unfold.fromList " " --- @ +-- >>> unwords = Stream.unfoldEachSepBy ' ' +-- >>> unwords = Stream.unfoldEachSepBySeq " " Unfold.fromList -- -- /Pre-release/ {-# INLINE unwords #-} unwords :: MonadIO m => Unfold m a Char -> Stream m a -> Stream m Char -unwords = Stream.interpose ' ' +unwords = Stream.unfoldEachSepBy ' ' diff --git a/src/Streamly/Internal/Data/Stream/Time.hs b/src/Streamly/Internal/Data/Stream/Time.hs index 9a1187f024..875c21d970 100644 --- a/src/Streamly/Internal/Data/Stream/Time.hs +++ b/src/Streamly/Internal/Data/Stream/Time.hs @@ -27,6 +27,8 @@ module Streamly.Internal.Data.Stream.Time , intervalsOf , boundedIntervalsOf , timedGroupsOf + , timedChunksOf + , timedChunksOf' -- * Sampling , sampleIntervalEnd @@ -64,6 +66,8 @@ import Data.Map (Map) import Data.Maybe (isNothing) import Data.Proxy (Proxy(..)) import Streamly.Data.Fold (Fold) +import Streamly.Data.Array (Unbox) +import Streamly.Internal.Data.Array (Array) import Streamly.Internal.Data.Fold (Fold (..)) import Streamly.Internal.Data.IsMap (IsMap(..)) import Streamly.Internal.Data.Channel.Types (Rate, rate) @@ -78,6 +82,7 @@ import Streamly.Internal.Data.Time.Units import Streamly.Internal.Data.Time.Units (NanoSecond64(..), toRelTime64) import qualified Data.Heap as H +import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Data.Fold as Fold import qualified Streamly.Data.Scanl as Scanl import qualified Streamly.Data.Stream as Stream @@ -290,6 +295,26 @@ groupsOfTimeout :: MonadAsync m => Int -> Double -> Fold m a b -> Stream m a -> Stream m b groupsOfTimeout n timeout = timedGroupsOf timeout n +-- Ideally this should be in an array module, but we do not have one in +-- streamly package. + +-- | Like 'chunksOf' from the Array module but emits the chunk after the +-- timeout even if we have not yet collected the requested size. +{-# INLINE timedChunksOf #-} +timedChunksOf :: (MonadAsync m, Unbox a) => + Double -> Int -> Stream m a -> Stream m (Array a) +timedChunksOf timeout n = timedGroupsOf timeout n (Array.unsafeCreateOf n) + +-- | Like 'timedChunksOf' but creates pinned arrays. If the chunks are smaller +-- than LARGE_OBJECT_THRESHOLD then this routine may be useful for better +-- performance if the arrays are to be sent for IO. This will avoid a copy for +-- pinning by the IO routines. +{-# INLINE timedChunksOf' #-} +timedChunksOf' :: (MonadAsync m, Unbox a) => + Double -> Int -> Stream m a -> Stream m (Array a) +timedChunksOf' timeout n = + timedGroupsOf timeout n (Array.unsafePinnedCreateOf n) + ------------------------------------------------------------------------------ -- Windowed classification ------------------------------------------------------------------------------ @@ -575,7 +600,7 @@ classifySessionsByGeneric -> Stream m (Key f, b) -- ^ session key, fold result classifySessionsByGeneric _ tick reset ejectPred tmout (Fold step initial extract final) input = - Stream.unfoldMany (Unfold.lmap sessionOutputStream Unfold.fromStream) + Stream.unfoldEach (Unfold.lmap sessionOutputStream Unfold.fromStream) $ Stream.scanlMAfter' sstep (return szero) (flush final) $ interject (return Nothing) tick $ fmap Just input diff --git a/src/Streamly/Internal/Network/Inet/TCP.hs b/src/Streamly/Internal/Network/Inet/TCP.hs index 16e93d8f23..107dad4939 100644 --- a/src/Streamly/Internal/Network/Inet/TCP.hs +++ b/src/Streamly/Internal/Network/Inet/TCP.hs @@ -346,11 +346,11 @@ withConnection addr port = S.bracketIO (connect addr port) Net.close {-# INLINE reader #-} reader :: (MonadCatch m, MonadAsync m) => Unfold m ((Word8, Word8, Word8, Word8), PortNumber) Word8 -reader = UF.many A.reader (usingConnection ISK.chunkReader) +reader = UF.unfoldEach A.reader (usingConnection ISK.chunkReader) {-# INLINE concatChunks #-} concatChunks :: (Monad m, Unbox a) => Stream m (Array a) -> Stream m a -concatChunks = S.unfoldMany A.reader +concatChunks = S.unfoldEach A.reader -- | Read a stream from the supplied IPv4 host address and port number. -- diff --git a/src/Streamly/Internal/Network/Socket.hs b/src/Streamly/Internal/Network/Socket.hs index e4d8a8acd3..cb621d9a89 100644 --- a/src/Streamly/Internal/Network/Socket.hs +++ b/src/Streamly/Internal/Network/Socket.hs @@ -99,7 +99,7 @@ import qualified Streamly.Data.Stream as S import qualified Streamly.Data.Unfold as UF import qualified Streamly.Internal.Data.Array as A ( unsafeFreeze, unsafePinnedAsPtr, pinnedChunksOf, - pinnedCreateOf, unsafePinnedCreateOf, lCompactGE ) + pinnedCreateOf, unsafePinnedCreateOf, scanCompactMin ) import qualified Streamly.Internal.Data.MutArray as MArray (unsafePinnedCreateUsingPtr) import qualified Streamly.Internal.Data.Stream as S (fromStreamK, Stream(..), Step(..)) @@ -398,7 +398,7 @@ chunkReader = UF.first defaultChunkSize chunkReaderWith {-# INLINE concatChunks #-} concatChunks :: (Monad m, Unbox a) => Stream m (Array a) -> Stream m a -concatChunks = S.unfoldMany A.reader +concatChunks = S.unfoldEach A.reader -- | Generate a byte stream from a socket using a buffer of the given size. -- @@ -421,7 +421,7 @@ read = readWith defaultChunkSize -- {-# INLINE readerWith #-} readerWith :: MonadIO m => Unfold m (Int, Socket) Word8 -readerWith = UF.many A.reader chunkReaderWith +readerWith = UF.unfoldEach A.reader chunkReaderWith -- | Same as 'readWith' -- @@ -465,7 +465,10 @@ writeChunks h = FL.drainMapM (liftIO . putChunk h) {-# INLINE writeChunksWith #-} writeChunksWith :: (MonadIO m, Unbox a) => Int -> Socket -> Fold m (Array a) () -writeChunksWith n h = A.lCompactGE n (writeChunks h) +-- writeChunksWith n h = A.lCompactBySizeGE n (writeChunks h) +writeChunksWith n h = + FL.postscanl (A.scanCompactMin n) + $ FL.catMaybes (writeChunks h) -- | Same as 'writeChunksWith' -- diff --git a/test/Streamly/Test/Data/Array/Common.hs b/test/Streamly/Test/Data/Array/Common.hs index 0f1a978eeb..5c1866a639 100644 --- a/test/Streamly/Test/Data/Array/Common.hs +++ b/test/Streamly/Test/Data/Array/Common.hs @@ -81,7 +81,7 @@ foldManyWith f = monadicIO $ do xs <- run $ S.fold Fold.toList - $ S.unfoldMany A.reader + $ S.unfoldEach A.reader $ S.foldMany (f 240) $ S.fromList list assert (xs == list) diff --git a/test/Streamly/Test/Data/Parser.hs b/test/Streamly/Test/Data/Parser.hs index ec1452dd9f..f5b08a69a4 100644 --- a/test/Streamly/Test/Data/Parser.hs +++ b/test/Streamly/Test/Data/Parser.hs @@ -392,7 +392,7 @@ takeStartBy = Left _ -> property False where predicate = odd - parser = P.takeStartBy predicate FL.toList + parser = P.takeBeginBy predicate FL.toList takeWhile :: Property takeWhile = @@ -864,7 +864,7 @@ parseUnfold = do Producer.simplify (Producer.parseManyD parser readSrc) xs <- run $ S.toList - $ S.unfoldMany Unfold.fromList + $ S.unfoldEach Unfold.fromList $ S.catRights $ S.unfold streamParser src @@ -1272,7 +1272,7 @@ takeStartBy_ = Left err -> property (displayException err == msg) where predicate = odd - parser = P.takeStartBy_ predicate FL.toList + parser = P.takeBeginBy_ predicate FL.toList quotedWordTest :: String -> [String] -> IO () quotedWordTest inp expected = do diff --git a/test/Streamly/Test/Data/ParserK.hs b/test/Streamly/Test/Data/ParserK.hs index f1a372366b..1230640385 100644 --- a/test/Streamly/Test/Data/ParserK.hs +++ b/test/Streamly/Test/Data/ParserK.hs @@ -650,7 +650,7 @@ parseMany = forAll (listOf (vectorOf len (chooseAny :: Gen Int))) $ \ ins -> monadicIO $ do outs <- - (toList $ S.catRights $ S.parseManyD + (toList $ S.catRights $ S.parseMany (P.fromFold $ FL.take len FL.toList) (S.fromList $ concat ins) ) return $ outs == ins @@ -678,7 +678,7 @@ parseUnfold = do Producer.simplify (Producer.parseManyD parser readSrc) xs <- run $ toList - $ S.unfoldMany Unfold.fromList + $ S.unfoldEach Unfold.fromList $ S.catRights $ S.unfold streamParser src @@ -749,7 +749,7 @@ parseMany2Events = ( run $ toList $ S.catRights - $ S.parseManyD readOneEvent + $ S.parseMany readOneEvent $ S.fromList (concat (replicate 2 event)) ) assert (length xs == 2) diff --git a/test/Streamly/Test/Data/Stream.hs b/test/Streamly/Test/Data/Stream.hs index 573d75d270..c19338c565 100644 --- a/test/Streamly/Test/Data/Stream.hs +++ b/test/Streamly/Test/Data/Stream.hs @@ -59,7 +59,7 @@ toList = Stream.toList -- XXX Where are the tests for "takeEndBy"? splitOn :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitOn predicate f = Stream.foldMany1 (Fold.takeEndBy_ predicate f) +splitOn predicate f = Stream.foldManyPost (Fold.takeEndBy_ predicate f) splitOnSuffix :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b @@ -68,11 +68,11 @@ splitOnSuffix predicate f = Stream.foldMany (Fold.takeEndBy_ predicate f) -- XXX Where are the tests for "takeEndBySeq"? splitOnSeqFold :: (MonadIO m, Unbox a, Enum a, Eq a) => Array.Array a -> Fold m a b -> Stream m a -> Stream m b -splitOnSeqFold patt f = Stream.foldMany1 (Fold.takeEndBySeq_ patt f) +splitOnSeqFold patt f = Stream.foldManyPost (Fold.takeEndBySeq_ patt f) splitOnSeqStream :: (MonadIO m, Unbox a, Enum a, Eq a) => Array.Array a -> Fold m a b -> Stream m a -> Stream m b -splitOnSeqStream = Stream.splitOnSeq +splitOnSeqStream = Stream.splitSepBySeq_ splitOnSuffixSeqFold :: (MonadIO m, Unbox a, Enum a, Eq a) => Array.Array a -> Fold m a b -> Stream m a -> Stream m b @@ -222,7 +222,7 @@ intercalateSplitEqId sep splitter lIntercalater sIntercalater i = ys <- splitter xs (replicate i sep) szs <- toList - $ sIntercalater Unfold.fromList (replicate i sep) + $ sIntercalater (replicate i sep) Unfold.fromList $ Stream.fromList ys let lzs = lIntercalater (replicate i sep) ys listEquals (==) szs xs @@ -244,7 +244,7 @@ intercalateSplitEqIdNoSepEnd sep splitter lIntercalater sIntercalater i = ys <- splitter xs (replicate i sep) szs <- toList - $ sIntercalater Unfold.fromList (replicate i sep) + $ sIntercalater (replicate i sep) Unfold.fromList $ Stream.fromList ys let lzs = lIntercalater (replicate i sep) ys listEquals (==) szs xs @@ -266,7 +266,7 @@ concatSplitIntercalateEqConcat sep splitter lIntercalater sIntercalater i = lys <- splitter lxs (replicate i sep) sxs <- toList - $ sIntercalater Unfold.fromList (replicate i sep) + $ sIntercalater (replicate i sep) Unfold.fromList $ Stream.fromList xss sys <- splitter sxs (replicate i sep) listEquals (==) (concat lys) (concat xss) @@ -286,7 +286,9 @@ splitIntercalateEqId sep splitter lIntercalater sIntercalater = testCase xss = do let lxs = lIntercalater [sep] xss lys <- splitter lxs [sep] - sxs <- toList $ sIntercalater Unfold.fromList [sep] $ Stream.fromList xss + sxs <- toList + $ sIntercalater [sep] Unfold.fromList + $ Stream.fromList xss sys <- splitter sxs [sep] listEquals (==) lys xss listEquals (==) sys xss @@ -300,26 +302,28 @@ splitterProperties sep desc = do describe (desc <> " splitOn") $ do - intercalateSplitEqId sep splitOn_ intercalate Stream.intercalate 1 + intercalateSplitEqId + sep splitOn_ intercalate Stream.unfoldEachSepBySeq 1 concatSplitIntercalateEqConcat - sep splitOn_ intercalate Stream.intercalate 1 + sep splitOn_ intercalate Stream.unfoldEachSepBySeq 1 -- Exclusive case - splitIntercalateEqId sep splitOn_ intercalate Stream.intercalate + splitIntercalateEqId + sep splitOn_ intercalate Stream.unfoldEachSepBySeq describe (desc <> " splitOnSuffix") $ do intercalateSplitEqIdNoSepEnd - sep splitOnSuffix_ intercalate Stream.intercalate 1 + sep splitOnSuffix_ intercalate Stream.unfoldEachSepBySeq 1 concatSplitIntercalateEqConcat - sep splitOnSuffix_ intercalateSuffix Stream.intercalateSuffix 1 + sep splitOnSuffix_ intercalateSuffix Stream.unfoldEachEndBySeq 1 -- Exclusive case splitIntercalateEqId - sep splitOnSuffix_ intercalateSuffix Stream.intercalateSuffix + sep splitOnSuffix_ intercalateSuffix Stream.unfoldEachEndBySeq where @@ -358,27 +362,27 @@ seqSplitterProperties sep desc = do splitOnSeqWith op = do forM_ [0, 1, 2, 4] - $ intercalateSplitEqId sep op intercalate Stream.intercalate + $ intercalateSplitEqId sep op intercalate Stream.unfoldEachSepBySeq forM_ [0, 1, 2, 4] $ concatSplitIntercalateEqConcat - sep op intercalate Stream.intercalate + sep op intercalate Stream.unfoldEachSepBySeq -- Exclusive case - splitIntercalateEqId sep op intercalate Stream.intercalate + splitIntercalateEqId sep op intercalate Stream.unfoldEachSepBySeq splitOnSuffixSeqWith op = do forM_ [0, 1, 2, 4] $ intercalateSplitEqIdNoSepEnd - sep op intercalate Stream.intercalate + sep op intercalate Stream.unfoldEachSepBySeq forM_ [0, 1, 2, 4] $ concatSplitIntercalateEqConcat - sep op intercalateSuffix Stream.intercalateSuffix + sep op intercalateSuffix Stream.unfoldEachEndBySeq -- Exclusive case splitIntercalateEqId - sep op intercalateSuffix Stream.intercalateSuffix + sep op intercalateSuffix Stream.unfoldEachEndBySeq intercalateSplitOnId :: forall a. (Arbitrary a, Eq a, Show a, Num a) => diff --git a/test/Streamly/Test/Data/Unfold.hs b/test/Streamly/Test/Data/Unfold.hs index e0acac4405..875087f65f 100644 --- a/test/Streamly/Test/Data/Unfold.hs +++ b/test/Streamly/Test/Data/Unfold.hs @@ -549,7 +549,7 @@ concat :: Bool concat = let unfIn = UF.replicateM unfOut = UF.map ((10,) . return) UF.enumerateFromToIntegral - unf = UF.many unfIn unfOut + unf = UF.unfoldEach unfIn unfOut lst = Prelude.concat $ Prelude.map (Prelude.replicate 10) [1 .. 10] in testUnfoldD unf (1, 10) lst