@@ -16,28 +16,40 @@ module Data.Rakhana.Internal.Parsers where
16
16
--------------------------------------------------------------------------------
17
17
import Prelude hiding (take )
18
18
import Control.Applicative ((<$) , (<|>) , many )
19
- import Control.Monad (MonadPlus , mzero )
19
+ import Control.Monad (MonadPlus , mzero , when )
20
20
import Data.ByteString (ByteString )
21
21
import qualified Data.ByteString as B
22
22
import qualified Data.ByteString.Char8 as B8
23
23
import Data.Char (digitToInt , isDigit , isHexDigit )
24
24
25
25
--------------------------------------------------------------------------------
26
26
import Data.Attoparsec.ByteString.Lazy (Parser )
27
+ import qualified Data.Attoparsec.ByteString as AB
27
28
import Data.Attoparsec.ByteString.Char8 hiding (isDigit )
28
29
import Data.Scientific (floatingOrInteger )
29
30
30
31
--------------------------------------------------------------------------------
31
32
import Data.Rakhana.Internal.Types
32
33
33
34
--------------------------------------------------------------------------------
34
- parseHeader :: Parser ( Int , Int )
35
+ parseHeader :: Parser Header
35
36
parseHeader
36
37
= do _ <- string " %PDF-"
37
38
maj <- decimal
38
39
_ <- char ' .'
39
40
min <- decimal
40
- return (maj, min )
41
+ skipSpace
42
+ skipComment
43
+ -- bin <- parseBinary -- <|> return False
44
+ return $ makeHeader maj min True -- bin
45
+ where
46
+ parseBinary
47
+ = do _ <- char ' %'
48
+ bs <- take 12
49
+ -- when (B.any (< 128) bs) $
50
+ -- fail "doesn't contain binary data"
51
+ -- endOfLine
52
+ return True
41
53
42
54
--------------------------------------------------------------------------------
43
55
startXRef :: Parser Int
@@ -59,6 +71,17 @@ tableXRef
59
71
= do _ <- string " xref"
60
72
pdfEndOfLine
61
73
74
+ --------------------------------------------------------------------------------
75
+ parseXRef :: Parser Structure
76
+ parseXRef
77
+ = do skipSpace
78
+ tableXRef
79
+ h <- parseSubsectionHeader
80
+ es <- parseTableEntries
81
+ t <- parseTrailerAfterTable
82
+ i <- startXRef
83
+ return $ XRef $ makeXRefTable h es t i
84
+
62
85
--------------------------------------------------------------------------------
63
86
parseSubsectionHeader :: Parser (Int , Int )
64
87
parseSubsectionHeader
@@ -71,23 +94,29 @@ parseSubsectionHeader
71
94
--------------------------------------------------------------------------------
72
95
parseTrailerAfterTable :: Parser Dictionary
73
96
parseTrailerAfterTable
74
- = do _ <- string " trailer"
97
+ = do skipSpace
98
+ _ <- string " trailer"
75
99
pdfEndOfLine
76
100
skipSpace
77
101
Dict d <- parseDict
78
102
return d
79
103
80
104
--------------------------------------------------------------------------------
81
- parseTableEntry :: Parser (Int , Int , Bool )
105
+ parseTableEntries :: Parser [TableEntry ]
106
+ parseTableEntries = many1 parseTableEntry
107
+
108
+ --------------------------------------------------------------------------------
109
+ parseTableEntry :: Parser TableEntry
82
110
parseTableEntry
83
- = do offset <- decimal
111
+ = do skipSpace
112
+ offset <- decimal
84
113
skipSpace
85
114
gen <- decimal
86
115
skipSpace
87
116
c <- anyChar
88
117
case c of
89
- ' n' -> return ( offset, gen, False )
90
- ' f' -> return ( offset, gen, True )
118
+ ' n' -> return $ makeTableEntry offset gen False
119
+ ' f' -> return $ makeTableEntry offset gen True
91
120
_ ->
92
121
let msg = " error parsing XRef table entry: unknown char: " ++
93
122
[c] in
@@ -238,7 +267,7 @@ parseStreamBytes len
238
267
return bytes
239
268
240
269
--------------------------------------------------------------------------------
241
- parseIndirectObject :: Parser IndirectObject
270
+ parseIndirectObject :: Parser Structure
242
271
parseIndirectObject
243
272
= do skipSpace
244
273
idx <- decimal
@@ -251,15 +280,16 @@ parseIndirectObject
251
280
obj <- parseObject
252
281
case obj of
253
282
Dict d ->
254
- do let iobj = makeIndObj idx gen obj
283
+ do let iobj = IndObj $ makeIndObj idx gen obj
255
284
stream
256
285
= do v <- lookupM " Length" d
257
286
len <- natural v
258
287
bs <- parseStreamBytes len
259
- let idobj = makeIndObj idx gen (Stream d bs)
288
+ let idobj = IndObj $ makeIndObj idx gen
289
+ (Stream d bs)
260
290
return idobj
261
291
stream <|> (parseEndOfObject >> return iobj)
262
- _ -> return $ makeIndObj idx gen obj
292
+ _ -> return $ IndObj $ makeIndObj idx gen obj
263
293
264
294
--------------------------------------------------------------------------------
265
295
makeIndObj :: Int -> Int -> Object -> IndirectObject
@@ -270,6 +300,38 @@ makeIndObj idx gen obj
270
300
, indObject = obj
271
301
}
272
302
303
+ --------------------------------------------------------------------------------
304
+ makeXRefTable :: (Int , Int )
305
+ -> [TableEntry ]
306
+ -> Dictionary
307
+ -> Int
308
+ -> XRefTable
309
+ makeXRefTable header entries dict start
310
+ = XRefTable
311
+ { xrefHeader = header
312
+ , xrefEntries = entries
313
+ , xrefTrailer = dict
314
+ , xrefStart = start
315
+ }
316
+
317
+ --------------------------------------------------------------------------------
318
+ makeTableEntry :: Int -> Int -> Bool -> TableEntry
319
+ makeTableEntry offset gen used
320
+ = TableEntry
321
+ { tableEntryOffset = offset
322
+ , tableEntryGeneration = gen
323
+ , tableEntryUsed = used
324
+ }
325
+
326
+ --------------------------------------------------------------------------------
327
+ makeHeader :: Int -> Int -> Bool -> Header
328
+ makeHeader mj mi b
329
+ = Header
330
+ { headerMaj = mj
331
+ , headerMin = mi
332
+ , headerBinary = b
333
+ }
334
+
273
335
--------------------------------------------------------------------------------
274
336
parseTillStreamData :: Parser ()
275
337
parseTillStreamData
0 commit comments