Skip to content

Commit 2feb2db

Browse files
committed
Introduce Tape
1 parent 0128835 commit 2feb2db

File tree

6 files changed

+335
-29
lines changed

6 files changed

+335
-29
lines changed

Data/Rakhana.hs

+42-12
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,23 @@
1414
module Data.Rakhana where
1515

1616
--------------------------------------------------------------------------------
17-
import Data.ByteString.Lazy (ByteString)
18-
import Data.Typeable
17+
import Prelude hiding (null)
18+
import qualified Data.ByteString.Lazy as L
19+
import Data.Typeable
1920

2021
--------------------------------------------------------------------------------
22+
import Control.Applicative ((<|>))
2123
import Control.Monad.Catch (Exception, MonadThrow(..))
2224
import Control.Monad.Trans (lift)
25+
import Data.Attoparsec.ByteString.Char8 (skipSpace)
2326
import Data.Attoparsec.ByteString.Lazy
2427
import Pipes
28+
import Pipes.Core
2529

2630
--------------------------------------------------------------------------------
2731
import Data.Rakhana.Internal.Parsers
2832
import Data.Rakhana.Internal.Types
33+
import Data.Rakhana.Tape
2934

3035
--------------------------------------------------------------------------------
3136
data RakhanaParserException
@@ -36,21 +41,46 @@ data RakhanaParserException
3641
instance Exception RakhanaParserException
3742

3843
--------------------------------------------------------------------------------
39-
makeDocument :: MonadThrow m => ByteString -> m Document
44+
makeDocument :: MonadThrow m => L.ByteString -> m Document
4045
makeDocument start
4146
= case parse parseHeader start of
4247
Fail _ ctx e
4348
-> throwM $ RakhanaParserException ctx e
44-
Done rest ver
45-
-> let doc = Document ver (objectProducer rest) in
49+
Done rest header
50+
-> let doc = Document header (objectProducer rest) in
4651
return doc
4752

4853
--------------------------------------------------------------------------------
49-
objectProducer :: MonadThrow m => ByteString -> Producer' Structure m ()
54+
objectProducer :: MonadThrow m => L.ByteString -> Producer' Structure m ()
5055
objectProducer bytes
51-
= case parse parseIndirectObject bytes of
52-
Fail _ ctx e
53-
-> lift $ throwM $ RakhanaParserException ctx e
54-
Done rest iObj
55-
-> do yield $ IndObj iObj
56-
objectProducer rest
56+
= case parse skipSpace bytes of
57+
Done cl _
58+
| L.null cl -> return ()
59+
| otherwise
60+
-> case parse parser bytes of
61+
Fail r ctx e
62+
-> lift $ throwM $ RakhanaParserException ctx e
63+
Done rest struct
64+
-> do yield struct
65+
objectProducer rest
66+
where
67+
parser = parseIndirectObject <|> parseXRef
68+
69+
--------------------------------------------------------------------------------
70+
fooBytes :: L.ByteString -> L.ByteString
71+
fooBytes input
72+
= case parse parseHeader input of
73+
Done rest _ -> L.take 30 rest
74+
Fail rest _ e -> L.take 30 rest
75+
76+
--------------------------------------------------------------------------------
77+
driveTest :: Drive ()
78+
driveTest
79+
= do driveBottom
80+
driveDirection Backward
81+
bs <- driveGet 30
82+
liftIO $ print bs
83+
84+
--------------------------------------------------------------------------------
85+
app :: IO ()
86+
app = runDrive (fileTape "samples/IdiomLite.pdf") driveTest

Data/Rakhana/Internal/Parsers.hs

+74-12
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,40 @@ module Data.Rakhana.Internal.Parsers where
1616
--------------------------------------------------------------------------------
1717
import Prelude hiding (take)
1818
import Control.Applicative ((<$), (<|>), many)
19-
import Control.Monad (MonadPlus, mzero)
19+
import Control.Monad (MonadPlus, mzero, when)
2020
import Data.ByteString (ByteString)
2121
import qualified Data.ByteString as B
2222
import qualified Data.ByteString.Char8 as B8
2323
import Data.Char (digitToInt, isDigit, isHexDigit)
2424

2525
--------------------------------------------------------------------------------
2626
import Data.Attoparsec.ByteString.Lazy (Parser)
27+
import qualified Data.Attoparsec.ByteString as AB
2728
import Data.Attoparsec.ByteString.Char8 hiding (isDigit)
2829
import Data.Scientific (floatingOrInteger)
2930

3031
--------------------------------------------------------------------------------
3132
import Data.Rakhana.Internal.Types
3233

3334
--------------------------------------------------------------------------------
34-
parseHeader :: Parser (Int, Int)
35+
parseHeader :: Parser Header
3536
parseHeader
3637
= do _ <- string "%PDF-"
3738
maj <- decimal
3839
_ <- char '.'
3940
min <- decimal
40-
return (maj, min)
41+
skipSpace
42+
skipComment
43+
--bin <- parseBinary -- <|> return False
44+
return $ makeHeader maj min True -- bin
45+
where
46+
parseBinary
47+
= do _ <- char '%'
48+
bs <- take 12
49+
--when (B.any (< 128) bs) $
50+
-- fail "doesn't contain binary data"
51+
-- endOfLine
52+
return True
4153

4254
--------------------------------------------------------------------------------
4355
startXRef :: Parser Int
@@ -59,6 +71,17 @@ tableXRef
5971
= do _ <- string "xref"
6072
pdfEndOfLine
6173

74+
--------------------------------------------------------------------------------
75+
parseXRef :: Parser Structure
76+
parseXRef
77+
= do skipSpace
78+
tableXRef
79+
h <- parseSubsectionHeader
80+
es <- parseTableEntries
81+
t <- parseTrailerAfterTable
82+
i <- startXRef
83+
return $ XRef $ makeXRefTable h es t i
84+
6285
--------------------------------------------------------------------------------
6386
parseSubsectionHeader :: Parser (Int, Int)
6487
parseSubsectionHeader
@@ -71,23 +94,29 @@ parseSubsectionHeader
7194
--------------------------------------------------------------------------------
7295
parseTrailerAfterTable :: Parser Dictionary
7396
parseTrailerAfterTable
74-
= do _ <- string "trailer"
97+
= do skipSpace
98+
_ <- string "trailer"
7599
pdfEndOfLine
76100
skipSpace
77101
Dict d <- parseDict
78102
return d
79103

80104
--------------------------------------------------------------------------------
81-
parseTableEntry :: Parser (Int, Int, Bool)
105+
parseTableEntries :: Parser [TableEntry]
106+
parseTableEntries = many1 parseTableEntry
107+
108+
--------------------------------------------------------------------------------
109+
parseTableEntry :: Parser TableEntry
82110
parseTableEntry
83-
= do offset <- decimal
111+
= do skipSpace
112+
offset <- decimal
84113
skipSpace
85114
gen <- decimal
86115
skipSpace
87116
c <- anyChar
88117
case c of
89-
'n' -> return (offset, gen, False)
90-
'f' -> return (offset, gen, True)
118+
'n' -> return $ makeTableEntry offset gen False
119+
'f' -> return $ makeTableEntry offset gen True
91120
_ ->
92121
let msg = "error parsing XRef table entry: unknown char: " ++
93122
[c] in
@@ -238,7 +267,7 @@ parseStreamBytes len
238267
return bytes
239268

240269
--------------------------------------------------------------------------------
241-
parseIndirectObject :: Parser IndirectObject
270+
parseIndirectObject :: Parser Structure
242271
parseIndirectObject
243272
= do skipSpace
244273
idx <- decimal
@@ -251,15 +280,16 @@ parseIndirectObject
251280
obj <- parseObject
252281
case obj of
253282
Dict d ->
254-
do let iobj = makeIndObj idx gen obj
283+
do let iobj = IndObj $ makeIndObj idx gen obj
255284
stream
256285
= do v <- lookupM "Length" d
257286
len <- natural v
258287
bs <- parseStreamBytes len
259-
let idobj = makeIndObj idx gen (Stream d bs)
288+
let idobj = IndObj $ makeIndObj idx gen
289+
(Stream d bs)
260290
return idobj
261291
stream <|> (parseEndOfObject >> return iobj)
262-
_ -> return $ makeIndObj idx gen obj
292+
_ -> return $ IndObj $ makeIndObj idx gen obj
263293

264294
--------------------------------------------------------------------------------
265295
makeIndObj :: Int -> Int -> Object -> IndirectObject
@@ -270,6 +300,38 @@ makeIndObj idx gen obj
270300
, indObject = obj
271301
}
272302

303+
--------------------------------------------------------------------------------
304+
makeXRefTable :: (Int, Int)
305+
-> [TableEntry]
306+
-> Dictionary
307+
-> Int
308+
-> XRefTable
309+
makeXRefTable header entries dict start
310+
= XRefTable
311+
{ xrefHeader = header
312+
, xrefEntries = entries
313+
, xrefTrailer = dict
314+
, xrefStart = start
315+
}
316+
317+
--------------------------------------------------------------------------------
318+
makeTableEntry :: Int -> Int -> Bool -> TableEntry
319+
makeTableEntry offset gen used
320+
= TableEntry
321+
{ tableEntryOffset = offset
322+
, tableEntryGeneration = gen
323+
, tableEntryUsed = used
324+
}
325+
326+
--------------------------------------------------------------------------------
327+
makeHeader :: Int -> Int -> Bool -> Header
328+
makeHeader mj mi b
329+
= Header
330+
{ headerMaj = mj
331+
, headerMin = mi
332+
, headerBinary = b
333+
}
334+
273335
--------------------------------------------------------------------------------
274336
parseTillStreamData :: Parser ()
275337
parseTillStreamData

Data/Rakhana/Internal/Types.hs

+31-3
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,43 @@ data IndirectObject
5353
}
5454
deriving (Eq, Show)
5555

56+
--------------------------------------------------------------------------------
57+
data TableEntry
58+
= TableEntry
59+
{ tableEntryOffset :: !Int
60+
, tableEntryGeneration :: !Int
61+
, tableEntryUsed :: !Bool
62+
}
63+
deriving Show
64+
65+
--------------------------------------------------------------------------------
66+
data XRefTable
67+
= XRefTable
68+
{ xrefHeader :: !(Int, Int)
69+
, xrefEntries :: ![TableEntry]
70+
, xrefTrailer :: !Dictionary
71+
, xrefStart :: !Int
72+
}
73+
deriving Show
74+
75+
--------------------------------------------------------------------------------
76+
data Header
77+
= Header
78+
{ headerMaj :: !Int
79+
, headerMin :: !Int
80+
, headerBinary :: !Bool
81+
}
82+
deriving Show
83+
5684
--------------------------------------------------------------------------------
5785
data Structure
5886
= IndObj IndirectObject
59-
| XRef
60-
deriving (Eq, Show)
87+
| XRef XRefTable
88+
deriving Show
6189

6290
--------------------------------------------------------------------------------
6391
data Document
6492
= Document
65-
{ documentVersion :: (Int, Int)
93+
{ documentHeader :: Header
6694
, documentObjects :: forall m. MonadThrow m => Producer' Structure m ()
6795
}

0 commit comments

Comments
 (0)