Skip to content

Commit

Permalink
Merge pull request #7 from luispedro/bzip2_chunked_bug
Browse files Browse the repository at this point in the history
BUG REPORT: Add buggy test case (files created by pbzip2)
  • Loading branch information
snoyberg committed Aug 5, 2019
2 parents 5ae7402 + 0f20d01 commit 584fad9
Show file tree
Hide file tree
Showing 7 changed files with 11,130 additions and 6 deletions.
3 changes: 3 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 0.3.0.2
* Fix handling of concatenated bzip2 files

## 0.3.0.1

* Reduce dep to `data-default-class` [#6](https://github.com/snoyberg/bzlib-conduit/pull/6)
Expand Down
42 changes: 37 additions & 5 deletions src/Data/Conduit/BZlib.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE RecordWildCards #-}
module Data.Conduit.BZlib (
compress,
decompress1,
decompress,

bzip2,
Expand Down Expand Up @@ -131,6 +132,7 @@ compress CompressParams {..} = do
yields ptr c'BZ_FINISH
loop
where
yields :: MonadIO m => Ptr C'bz_stream -> CInt -> ConduitT S.ByteString S.ByteString m ()
yields ptr action = do
cont <- liftIO $ throwIfMinus "bzCompress" $ c'BZ2_bzCompress ptr action
mbout <- liftIO $ getAvailOut ptr
Expand All @@ -140,12 +142,14 @@ compress CompressParams {..} = do
when (availIn > 0 || action == c'BZ_FINISH && cont /= c'BZ_STREAM_END) $
yields ptr action

-- | Decompress a stream of ByteStrings.
decompress
-- | Decompress a stream of ByteStrings. Note that this will only decompress
-- the first compressed stream in the input and leave the rest for further
-- processing. See 'decompress'.
decompress1
:: MonadResource m
=> DecompressParams -- ^ Decompress parameter
-> ConduitT S.ByteString S.ByteString m ()
decompress DecompressParams {..} = do
decompress1 DecompressParams {..} = do
(ptr, inbuf) <- lift $ allocateStream
_ <- lift $ allocate
(throwIfMinus_ "bzDecompressInit" $
Expand All @@ -172,13 +176,41 @@ decompress DecompressParams {..} = do
yield $ fromJust mbout
availIn <- liftIO $ peek $ p'bz_stream'avail_in ptr
if availIn > 0
then yields ptr
then
-- bzip2 files can contain multiple concatenated streams, but the
-- API requires that we close the stream and start a new
-- decompression session.
if ret == c'BZ_STREAM_END
then do
dataIn <- liftIO $ peek $ p'bz_stream'next_in ptr
unread <- liftIO $ S.packCStringLen (dataIn, fromIntegral availIn)
leftover unread
return False
else yields ptr
else return $ ret == c'BZ_OK

-- Decompress all the compressed bzip2 streams in the input, as the bzip2
-- command line tool.
decompress
:: MonadResource m
=> DecompressParams -- ^ Decompress parameter
-> ConduitT S.ByteString S.ByteString m ()
decompress params = do
next <- await
case next of
Nothing -> return ()
Just bs
| S.null bs -> decompress params
| otherwise -> do
leftover bs
decompress1 params
decompress params
-- | bzip2 compression with default parameters.
bzip2 :: MonadResource m => ConduitT S.ByteString S.ByteString m ()
bzip2 = compress def

-- | bzip2 decompression with default parameters.
-- | bzip2 decompression with default parameters. This will decompress all the
-- streams in the input
bunzip2 :: MonadResource m => ConduitT S.ByteString S.ByteString m ()
bunzip2 = decompress def

Binary file added test/sample4.bz2
Binary file not shown.
11,088 changes: 11,088 additions & 0 deletions test/sample4.ref

Large diffs are not rendered by default.

Binary file added test/sample5.bz2
Binary file not shown.
Binary file added test/sample5.ref
Binary file not shown.
3 changes: 2 additions & 1 deletion test/test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ import Prelude as P
main :: IO ()
main = hspec $ do
describe "decompress" $ do
forM_ ["sample1", "sample2", "sample3"] $ \file -> do
forM_ [1..5] $ \n -> do
let file = "sample"++show n
it ("correctly " ++ file ++ ".bz2") $ do
dec <- runConduitRes
$ sourceFile ("test/" ++ file ++ ".bz2")
Expand Down

0 comments on commit 584fad9

Please sign in to comment.