From 46c5c7c73fb985400ae79d67e18d022253498b80 Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Sun, 17 Jan 2021 05:50:25 +0900 Subject: [PATCH 1/9] Add OLEFile2.init(FileWrapper) --- .../contents.xcworkspacedata | 7 + Sources/OLEKit/DataStream.swift | 16 +- Sources/OLEKit/DirectoryEntry.swift | 32 ++- Sources/OLEKit/FAT.swift | 2 +- Sources/OLEKit/FileWrapper+Extension.swift | 205 ++++++++++++++++++ Sources/OLEKit/Header.swift | 2 +- Sources/OLEKit/OLEFile2.swift | 90 ++++++++ Tests/OLEKitTests/OLEKitTests.swift | 13 ++ 8 files changed, 356 insertions(+), 11 deletions(-) create mode 100644 .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata create mode 100644 Sources/OLEKit/FileWrapper+Extension.swift create mode 100644 Sources/OLEKit/OLEFile2.swift diff --git a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000..919434a --- /dev/null +++ b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/Sources/OLEKit/DataStream.swift b/Sources/OLEKit/DataStream.swift index c1466f2..4ea170a 100644 --- a/Sources/OLEKit/DataStream.swift +++ b/Sources/OLEKit/DataStream.swift @@ -52,7 +52,7 @@ public final class DataReader: Reader { precondition(byteOffset + 1 <= data.count) defer { byteOffset += 1 } - return data[byteOffset] + return data[data.startIndex + byteOffset] } /// Read two bytes in little-endian order as a single `UInt16` value and @@ -61,7 +61,7 @@ public final class DataReader: Reader { precondition(byteOffset + 2 <= data.count) defer { byteOffset += 2 } - return (UInt16(data[byteOffset + 1]) << 8) + UInt16(data[byteOffset]) + return (UInt16(data[data.startIndex + byteOffset + 1]) << 8) + UInt16(data[data.startIndex + byteOffset]) } /// Read four bytes in little-endian order as a single `UInt32` value and @@ -70,10 +70,10 @@ public final class DataReader: Reader { precondition(byteOffset + 4 <= data.count) defer { byteOffset += 4 } - return (UInt32(data[byteOffset + 3]) << 24) - + (UInt32(data[byteOffset + 2]) << 16) - + (UInt32(data[byteOffset + 1]) << 8) - + UInt32(data[byteOffset]) + return (UInt32(data[data.startIndex + byteOffset + 3]) << 24) + + (UInt32(data[data.startIndex + byteOffset + 2]) << 16) + + (UInt32(data[data.startIndex + byteOffset + 1]) << 8) + + UInt32(data[data.startIndex + byteOffset]) } /// Read a given `count` of bytes as raw data and increment `byteOffset` by `count`. @@ -81,7 +81,7 @@ public final class DataReader: Reader { precondition(byteOffset + length <= data.count) defer { byteOffset += length } - return data[byteOffset.. Data { @@ -89,6 +89,6 @@ public final class DataReader: Reader { defer { byteOffset = data.count - 1 } - return data[byteOffset.. [DirectoryEntry] { try Self.entries(index: 0, at: sectorID, in: fileHandle, header, fat: fat) } + + private static func entries( + index: UInt32, + at sectorID: UInt32, + in fileWrapper: FileWrapper, + _ header: Header, + fat: [UInt32] + ) throws -> [DirectoryEntry] { + var stream = try fileWrapper.oleStream( + sectorID: sectorID, + firstSectorOffset: UInt64(header.sectorSize), + sectorSize: header.sectorSize, + fat: fat + ) + var peers = [DirectoryEntry]() + + if let entry = try DirectoryEntry(&stream, &peers, index: index, sectorSize: header.sectorSize) { + peers.append(entry) + } + return peers + } + + static func entries( + rootAt sectorID: UInt32, + in fileWrapper: FileWrapper, + _ header: Header, + fat: [UInt32] + ) throws -> [DirectoryEntry] { + try Self.entries(index: 0, at: sectorID, in: fileWrapper, header, fat: fat) + } } diff --git a/Sources/OLEKit/FAT.swift b/Sources/OLEKit/FAT.swift index 61117dd..77882a2 100644 --- a/Sources/OLEKit/FAT.swift +++ b/Sources/OLEKit/FAT.swift @@ -36,7 +36,7 @@ enum SectorID: UInt32 { FAT sectors, right after the header which is 76 bytes long. (always 109, whatever the sector size: 512 bytes = 76+4*109) Additional sectors are described by DIFAT blocks */ -private let maxFATSectorsCount: UInt32 = 109 +let maxFATSectorsCount: UInt32 = 109 extension FileHandle { func loadSector(_ header: Header, index: UInt32) throws -> DataReader { diff --git a/Sources/OLEKit/FileWrapper+Extension.swift b/Sources/OLEKit/FileWrapper+Extension.swift new file mode 100644 index 0000000..e3b60e4 --- /dev/null +++ b/Sources/OLEKit/FileWrapper+Extension.swift @@ -0,0 +1,205 @@ +import Foundation + +extension FileWrapper { + func loadSector(_ header: Header, index: UInt32) throws -> DataReader { + let sectorOffset = Int(header.sectorSize) * Int(index + 1) + + guard sectorOffset < header.fileSize + else { throw OLEError.invalidFATSector(byteOffset: UInt64(sectorOffset)) } + + let range = sectorOffset..<(sectorOffset + Int(header.sectorSize)) + return DataReader(regularFileContents![range]) + } + + func loadSectors( + _ header: Header, + indexStream: inout DataReader, + count: UInt32 + ) throws -> [UInt32] { + var result = [UInt32]() + result.reserveCapacity(Int(count)) + + for _ in 0.. [UInt32] { + var fat = [UInt32]() + + try fat.append(contentsOf: loadSectors( + header, + indexStream: &headerStream, + count: maxFATSectorsCount + )) + + // Since FAT is read from fixed-size sectors, it may contain more values + // than the actual number of sectors in the file. + // Keep only the relevant sector indexes: + if UInt64(fat.count) > header.sectorsCount { + fat = Array(fat.prefix(header.sectorsCount)) + } + + if header.diFATSectorsCount > 0 { + // There's a DIFAT because file is larger than 6.8MB. + // Some checks just in case: + + // There must be at least 109 blocks in header and the rest in + // DIFAT, so number of sectors must be >109. + guard header.fatSectorsCount > maxFATSectorsCount else { + throw OLEError.incorrectNumberOfFATSectors( + actual: header.fatSectorsCount, + expected: maxFATSectorsCount + ) + } + + guard header.firstDIFATSector < UInt(header.sectorsCount) else { + throw OLEError.sectorIndexInDIFATOOB( + actual: header.firstDIFATSector, + expected: header.sectorsCount + ) + } + + // We compute the necessary number of DIFAT sectors : + // Number of pointers per DIFAT sector = (sectorsize/4)-1 + // (-1 because the last pointer is the next DIFAT sector number) + let sectorPointersCount = UInt32(header.sectorSize / 4) - 1 + // (if 512 bytes: each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) + let inferredCount = + (header.fatSectorsCount - 109 + sectorPointersCount - 1) / sectorPointersCount + + guard header.diFATSectorsCount == inferredCount else { + throw OLEError.incorrectNumberOFDIFATSectors( + actual: header.diFATSectorsCount, + expected: inferredCount + ) + } + + var currentSectorID = header.firstDIFATSector + for _ in 0.. [UInt32] { + // MiniFAT is stored in a standard sub-stream, pointed to by a header + // field. + // NOTE: there are two sizes to take into account for this stream: + // 1) Stream size is calculated according to the number of sectors + // declared in the OLE header. This allocated stream may be more than + // needed to store the actual sector indexes. + // 2) Actually used size is calculated by dividing the MiniStream size + // (given by root entry size) by the size of mini sectors, *4 for + // 32 bits indexes: + + let streamSize = UInt64(header.miniFATSectorsCount) * UInt64(header.sectorSize) + let miniSectorsCount = (root.streamSize + UInt64(header.miniSectorSize) - 1) / + UInt64(header.miniSectorSize) + + let stream = try oleStream( + sectorID: header.firstMiniFATSector, + expectedStreamSize: streamSize, + firstSectorOffset: UInt64(header.sectorSize), + sectorSize: header.sectorSize, + fat: fat + ) + + var result = [UInt32]() + result.reserveCapacity(Int(miniSectorsCount)) + for _ in 0.. DataReader { + guard !(expectedStreamSize == 0 && sectorID == SectorID.endOfChain.rawValue) + else { throw OLEError.invalidEmptyStream } + + let sectorSize = UInt64(sectorSize) + let calculatedStreamSize = expectedStreamSize ?? UInt64(fat.count) * UInt64(sectorSize) + let numberOfSectors = (calculatedStreamSize + sectorSize - 1) / sectorSize + + // This number should (at least) be less than the total number of + // sectors in the given FAT: + guard numberOfSectors <= fat.count + else { throw OLEError.streamTooLarge(actual: numberOfSectors, expected: fat.count) } + + var currentSectorID = sectorID + var data = Data() + var offset = regularFileContents!.startIndex + for _ in 0..= 0 && UInt64(currentSectorID) < fat.count + else { throw OLEError.invalidOLEStreamSectorID(id: currentSectorID, total: fat.count) } + + offset = regularFileContents!.startIndex + Int(firstSectorOffset) + Int(sectorSize) * Int(currentSectorID) + + // if sector is the last of the file, sometimes it is not a + // complete sector (of 512 or 4K), so we may read less than + // sectorsize. + if currentSectorID == fat.count - 1 { + data.append(regularFileContents![offset.. calculatedStreamSize { + // `data` is truncated to the expected stream size + data = data.prefix(Int(calculatedStreamSize)) + } else if let expectedStreamSize = expectedStreamSize, data.count < expectedStreamSize { + // the stream size was not inferred, but was smaller than expected + throw OLEError.incompleteStream( + firstSectorID: sectorID, + actual: data.count, + expected: expectedStreamSize + ) + } + + return DataReader(data) + } +} diff --git a/Sources/OLEKit/Header.swift b/Sources/OLEKit/Header.swift index b871005..7900587 100644 --- a/Sources/OLEKit/Header.swift +++ b/Sources/OLEKit/Header.swift @@ -67,7 +67,7 @@ private let magic = Data([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]) SECT _sectFat[109]; // [4CH,436] the SECTs of first 109 FAT sectors }; */ -struct Header { +struct Header: Equatable { /// Size of the header, which is a sum of sizes of all fields from the spec. static let sizeInBytes = 76 diff --git a/Sources/OLEKit/OLEFile2.swift b/Sources/OLEKit/OLEFile2.swift new file mode 100644 index 0000000..4f2e4cd --- /dev/null +++ b/Sources/OLEKit/OLEFile2.swift @@ -0,0 +1,90 @@ +// Copyright 2020 CoreOffice contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +public final class OLEFile2 { + private var fileWrapper: FileWrapper + let header: Header + + /// File Allocation Table, also known as SAT – Sector Allocation Table + let fat: [UInt32] + + let miniFAT: [UInt32] + + // Can't be `lazy var` because Swift doesn't support throwing properties, and we need + // to handle (or rethrow) potential errors from `DataReader.init`. + private var miniStream: DataReader? + + public let root: DirectoryEntry + + public init(_ fileWrapper: FileWrapper) throws { + self.fileWrapper = fileWrapper + + guard let data = fileWrapper.regularFileContents + else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } + + var stream = DataReader(data[..<512]) + guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int + else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } + header = try Header(&stream, fileSize: fileSize, path: fileWrapper.filename ?? "") + + fat = try fileWrapper.loadFAT(headerStream: &stream, header) + + root = try DirectoryEntry.entries( + rootAt: header.firstDirectorySector, + in: fileWrapper, + header, + fat: fat + )[0] + + miniFAT = try fileWrapper.loadMiniFAT(header, root: root, fat: fat) + } + + /// Return an instance of `DataReader` that contains a given stream entry + public func stream(_ entry: DirectoryEntry) throws -> DataReader { + guard entry.type == .stream + else { throw OLEError.directoryEntryIsNotAStream(name: entry.name) } + + if entry.streamSize < header.miniStreamCutoffSize { + let miniStream = try self.miniStream ?? streamForceFAT(root) + + // cache miniStream + if self.miniStream == nil { + self.miniStream = miniStream + } + + return try miniStream.oleStream( + sectorID: entry.firstStreamSector, + expectedStreamSize: entry.streamSize, + firstSectorOffset: 0, + sectorSize: header.miniSectorSize, + fat: miniFAT + ) + } else { + return try streamForceFAT(entry) + } + } + + /// Always loads data according to FAT ignoring `miniStream` and `miniFAT` + func streamForceFAT(_ entry: DirectoryEntry) throws -> DataReader { + try fileWrapper.oleStream( + sectorID: entry.firstStreamSector, + expectedStreamSize: entry.streamSize, + firstSectorOffset: UInt64(header.sectorSize), + sectorSize: header.sectorSize, + fat: fat + ) + } +} diff --git a/Tests/OLEKitTests/OLEKitTests.swift b/Tests/OLEKitTests/OLEKitTests.swift index b279510..856eff5 100644 --- a/Tests/OLEKitTests/OLEKitTests.swift +++ b/Tests/OLEKitTests/OLEKitTests.swift @@ -128,4 +128,17 @@ final class OLEKitTests: XCTestCase { "FileHeader", ]) } + + func testOLEFile2() throws { + let url = URL(fileURLWithPath: #file) + .deletingLastPathComponent() + .appendingPathComponent("blank.hwp") + let wrapper = try FileWrapper(url: url, options: .immediate) + let ole = try OLEFile(url.path) + let ole2 = try OLEFile2(wrapper) + XCTAssertEqual(ole.header, ole2.header) + XCTAssertEqual(ole.fat, ole2.fat) + XCTAssertEqual(ole.miniFAT, ole2.miniFAT) + XCTAssertEqual(ole.root, ole2.root) + } } From 646b1af64603e7fafd43e5fd0ff156ba46983084 Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Sun, 17 Jan 2021 16:30:52 +0900 Subject: [PATCH 2/9] Check Linux --- Sources/OLEKit/DirectoryEntry.swift | 4 ++++ Sources/OLEKit/FileWrapper+Extension.swift | 4 ++++ Sources/OLEKit/OLEFile2.swift | 4 ++++ Tests/OLEKitTests/OLEKitTests.swift | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/Sources/OLEKit/DirectoryEntry.swift b/Sources/OLEKit/DirectoryEntry.swift index f43b954..7be10c4 100644 --- a/Sources/OLEKit/DirectoryEntry.swift +++ b/Sources/OLEKit/DirectoryEntry.swift @@ -171,6 +171,8 @@ public struct DirectoryEntry: Equatable { try Self.entries(index: 0, at: sectorID, in: fileHandle, header, fat: fat) } + #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) + private static func entries( index: UInt32, at sectorID: UInt32, @@ -200,4 +202,6 @@ public struct DirectoryEntry: Equatable { ) throws -> [DirectoryEntry] { try Self.entries(index: 0, at: sectorID, in: fileWrapper, header, fat: fat) } + + #endif } diff --git a/Sources/OLEKit/FileWrapper+Extension.swift b/Sources/OLEKit/FileWrapper+Extension.swift index e3b60e4..222a42c 100644 --- a/Sources/OLEKit/FileWrapper+Extension.swift +++ b/Sources/OLEKit/FileWrapper+Extension.swift @@ -1,5 +1,7 @@ import Foundation +#if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) + extension FileWrapper { func loadSector(_ header: Header, index: UInt32) throws -> DataReader { let sectorOffset = Int(header.sectorSize) * Int(index + 1) @@ -203,3 +205,5 @@ extension FileWrapper { return DataReader(data) } } + +#endif diff --git a/Sources/OLEKit/OLEFile2.swift b/Sources/OLEKit/OLEFile2.swift index 4f2e4cd..a69ea91 100644 --- a/Sources/OLEKit/OLEFile2.swift +++ b/Sources/OLEKit/OLEFile2.swift @@ -14,6 +14,8 @@ import Foundation +#if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) + public final class OLEFile2 { private var fileWrapper: FileWrapper let header: Header @@ -88,3 +90,5 @@ public final class OLEFile2 { ) } } + +#endif diff --git a/Tests/OLEKitTests/OLEKitTests.swift b/Tests/OLEKitTests/OLEKitTests.swift index 856eff5..8bd7f3f 100644 --- a/Tests/OLEKitTests/OLEKitTests.swift +++ b/Tests/OLEKitTests/OLEKitTests.swift @@ -129,6 +129,8 @@ final class OLEKitTests: XCTestCase { ]) } + #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) + func testOLEFile2() throws { let url = URL(fileURLWithPath: #file) .deletingLastPathComponent() @@ -141,4 +143,6 @@ final class OLEKitTests: XCTestCase { XCTAssertEqual(ole.miniFAT, ole2.miniFAT) XCTAssertEqual(ole.root, ole2.root) } + + #endif } From b159d6b6552e778b4f05c57f64a3879273bf326e Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Mon, 18 Jan 2021 02:22:52 +0900 Subject: [PATCH 3/9] Remove xcworkspace file --- .../xcode/package.xcworkspace/contents.xcworkspacedata | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata diff --git a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata deleted file mode 100644 index 919434a..0000000 --- a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +++ /dev/null @@ -1,7 +0,0 @@ - - - - - From 6a68c1c380362b5f440ba7f00e69802f9b5539af Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Mon, 18 Jan 2021 03:08:12 +0900 Subject: [PATCH 4/9] Refactor duplicated codes --- Sources/OLEKit/DataStream.swift | 2 + Sources/OLEKit/FileWrapper+Extension.swift | 14 +++- Sources/OLEKit/OLEFile.swift | 33 +++++++- Sources/OLEKit/OLEFile2.swift | 94 ---------------------- Tests/OLEKitTests/OLEKitTests.swift | 14 ++-- 5 files changed, 52 insertions(+), 105 deletions(-) delete mode 100644 Sources/OLEKit/OLEFile2.swift diff --git a/Sources/OLEKit/DataStream.swift b/Sources/OLEKit/DataStream.swift index 4ea170a..944dfdb 100644 --- a/Sources/OLEKit/DataStream.swift +++ b/Sources/OLEKit/DataStream.swift @@ -30,6 +30,8 @@ public final class DataWriter { public final class DataReader: Reader { let data: Data + var regularFileContents: Data? { data } + /// Current byte offset within the stream. var byteOffset = 0 diff --git a/Sources/OLEKit/FileWrapper+Extension.swift b/Sources/OLEKit/FileWrapper+Extension.swift index 222a42c..4e34036 100644 --- a/Sources/OLEKit/FileWrapper+Extension.swift +++ b/Sources/OLEKit/FileWrapper+Extension.swift @@ -2,7 +2,19 @@ import Foundation #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) -extension FileWrapper { +extension FileWrapper: Reader { + func seek(toOffset: Int) { + seek(toOffset: toOffset) + } + + func readData(ofLength: Int) -> Data { + readData(ofLength: ofLength) + } + + func readDataToEnd() -> Data { + readDataToEnd() + } + func loadSector(_ header: Header, index: UInt32) throws -> DataReader { let sectorOffset = Int(header.sectorSize) * Int(index + 1) diff --git a/Sources/OLEKit/OLEFile.swift b/Sources/OLEKit/OLEFile.swift index 7b98539..d5d12b3 100644 --- a/Sources/OLEKit/OLEFile.swift +++ b/Sources/OLEKit/OLEFile.swift @@ -15,7 +15,7 @@ import Foundation public final class OLEFile { - private var fileHandle: FileHandle + private var reader: Reader let header: Header /// File Allocation Table, also known as SAT – Sector Allocation Table @@ -40,7 +40,7 @@ public final class OLEFile { guard let fileHandle = FileHandle(forReadingAtPath: path) else { throw OLEError.fileNotAvailableForReading(path: path) } - self.fileHandle = fileHandle + reader = fileHandle guard fileSize >= 512 else { throw OLEError.incompleteHeader } @@ -62,6 +62,33 @@ public final class OLEFile { miniFAT = try fileHandle.loadMiniFAT(header, root: root, fat: fat) } + #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) + + public init(_ fileWrapper: FileWrapper) throws { + reader = fileWrapper + + guard let data = fileWrapper.regularFileContents + else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } + + var stream = DataReader(data[..<512]) + guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int + else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } + header = try Header(&stream, fileSize: fileSize, path: fileWrapper.filename ?? "") + + fat = try fileWrapper.loadFAT(headerStream: &stream, header) + + root = try DirectoryEntry.entries( + rootAt: header.firstDirectorySector, + in: fileWrapper, + header, + fat: fat + )[0] + + miniFAT = try fileWrapper.loadMiniFAT(header, root: root, fat: fat) + } + + #endif + /// Return an instance of `DataReader` that contains a given stream entry public func stream(_ entry: DirectoryEntry) throws -> DataReader { guard entry.type == .stream @@ -89,7 +116,7 @@ public final class OLEFile { /// Always loads data according to FAT ignoring `miniStream` and `miniFAT` func streamForceFAT(_ entry: DirectoryEntry) throws -> DataReader { - try fileHandle.oleStream( + try reader.oleStream( sectorID: entry.firstStreamSector, expectedStreamSize: entry.streamSize, firstSectorOffset: UInt64(header.sectorSize), diff --git a/Sources/OLEKit/OLEFile2.swift b/Sources/OLEKit/OLEFile2.swift deleted file mode 100644 index a69ea91..0000000 --- a/Sources/OLEKit/OLEFile2.swift +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2020 CoreOffice contributors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -#if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - -public final class OLEFile2 { - private var fileWrapper: FileWrapper - let header: Header - - /// File Allocation Table, also known as SAT – Sector Allocation Table - let fat: [UInt32] - - let miniFAT: [UInt32] - - // Can't be `lazy var` because Swift doesn't support throwing properties, and we need - // to handle (or rethrow) potential errors from `DataReader.init`. - private var miniStream: DataReader? - - public let root: DirectoryEntry - - public init(_ fileWrapper: FileWrapper) throws { - self.fileWrapper = fileWrapper - - guard let data = fileWrapper.regularFileContents - else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } - - var stream = DataReader(data[..<512]) - guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int - else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } - header = try Header(&stream, fileSize: fileSize, path: fileWrapper.filename ?? "") - - fat = try fileWrapper.loadFAT(headerStream: &stream, header) - - root = try DirectoryEntry.entries( - rootAt: header.firstDirectorySector, - in: fileWrapper, - header, - fat: fat - )[0] - - miniFAT = try fileWrapper.loadMiniFAT(header, root: root, fat: fat) - } - - /// Return an instance of `DataReader` that contains a given stream entry - public func stream(_ entry: DirectoryEntry) throws -> DataReader { - guard entry.type == .stream - else { throw OLEError.directoryEntryIsNotAStream(name: entry.name) } - - if entry.streamSize < header.miniStreamCutoffSize { - let miniStream = try self.miniStream ?? streamForceFAT(root) - - // cache miniStream - if self.miniStream == nil { - self.miniStream = miniStream - } - - return try miniStream.oleStream( - sectorID: entry.firstStreamSector, - expectedStreamSize: entry.streamSize, - firstSectorOffset: 0, - sectorSize: header.miniSectorSize, - fat: miniFAT - ) - } else { - return try streamForceFAT(entry) - } - } - - /// Always loads data according to FAT ignoring `miniStream` and `miniFAT` - func streamForceFAT(_ entry: DirectoryEntry) throws -> DataReader { - try fileWrapper.oleStream( - sectorID: entry.firstStreamSector, - expectedStreamSize: entry.streamSize, - firstSectorOffset: UInt64(header.sectorSize), - sectorSize: header.sectorSize, - fat: fat - ) - } -} - -#endif diff --git a/Tests/OLEKitTests/OLEKitTests.swift b/Tests/OLEKitTests/OLEKitTests.swift index 8bd7f3f..d87efce 100644 --- a/Tests/OLEKitTests/OLEKitTests.swift +++ b/Tests/OLEKitTests/OLEKitTests.swift @@ -131,17 +131,17 @@ final class OLEKitTests: XCTestCase { #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - func testOLEFile2() throws { + func testOLEFileFromFileWrapper() throws { let url = URL(fileURLWithPath: #file) .deletingLastPathComponent() .appendingPathComponent("blank.hwp") let wrapper = try FileWrapper(url: url, options: .immediate) - let ole = try OLEFile(url.path) - let ole2 = try OLEFile2(wrapper) - XCTAssertEqual(ole.header, ole2.header) - XCTAssertEqual(ole.fat, ole2.fat) - XCTAssertEqual(ole.miniFAT, ole2.miniFAT) - XCTAssertEqual(ole.root, ole2.root) + let oleFromPath = try OLEFile(url.path) + let oleFromWrapper = try OLEFile(wrapper) + XCTAssertEqual(oleFromPath.header, oleFromWrapper.header) + XCTAssertEqual(oleFromPath.fat, oleFromWrapper.fat) + XCTAssertEqual(oleFromPath.miniFAT, oleFromWrapper.miniFAT) + XCTAssertEqual(oleFromPath.root, oleFromWrapper.root) } #endif From f683820258a44ed8a51ebcd9fe06986bc67ab55d Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Mon, 18 Jan 2021 03:13:10 +0900 Subject: [PATCH 5/9] Change offset to data.startIndex --- Sources/OLEKit/DataStream.swift | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Sources/OLEKit/DataStream.swift b/Sources/OLEKit/DataStream.swift index 944dfdb..49092c8 100644 --- a/Sources/OLEKit/DataStream.swift +++ b/Sources/OLEKit/DataStream.swift @@ -33,10 +33,11 @@ public final class DataReader: Reader { var regularFileContents: Data? { data } /// Current byte offset within the stream. - var byteOffset = 0 + var byteOffset: Int init(_ data: Data) { self.data = data + byteOffset = data.startIndex } public var totalBytes: Int { @@ -44,46 +45,46 @@ public final class DataReader: Reader { } public func seek(toOffset offset: Int) { - precondition(offset < data.count) + precondition(offset < data.endIndex) byteOffset = offset } /// Read a single byte from the stream and increment `byteOffset` by 1. public func read() -> UInt8 { - precondition(byteOffset + 1 <= data.count) + precondition(byteOffset + 1 <= data.endIndex) defer { byteOffset += 1 } - return data[data.startIndex + byteOffset] + return data[byteOffset] } /// Read two bytes in little-endian order as a single `UInt16` value and /// increment `byteOffset` by 2. public func read() -> UInt16 { - precondition(byteOffset + 2 <= data.count) + precondition(byteOffset + 2 <= data.endIndex) defer { byteOffset += 2 } - return (UInt16(data[data.startIndex + byteOffset + 1]) << 8) + UInt16(data[data.startIndex + byteOffset]) + return (UInt16(data[byteOffset + 1]) << 8) + UInt16(data[byteOffset]) } /// Read four bytes in little-endian order as a single `UInt32` value and /// increment `byteOffset` by 4. public func read() -> UInt32 { - precondition(byteOffset + 4 <= data.count) + precondition(byteOffset + 4 <= data.endIndex) defer { byteOffset += 4 } - return (UInt32(data[data.startIndex + byteOffset + 3]) << 24) - + (UInt32(data[data.startIndex + byteOffset + 2]) << 16) - + (UInt32(data[data.startIndex + byteOffset + 1]) << 8) - + UInt32(data[data.startIndex + byteOffset]) + return (UInt32(data[byteOffset + 3]) << 24) + + (UInt32(data[byteOffset + 2]) << 16) + + (UInt32(data[byteOffset + 1]) << 8) + + UInt32(data[byteOffset]) } /// Read a given `count` of bytes as raw data and increment `byteOffset` by `count`. public func readData(ofLength length: Int) -> Data { - precondition(byteOffset + length <= data.count) + precondition(byteOffset + length <= data.endIndex) defer { byteOffset += length } - return data[data.startIndex + byteOffset.. Data { @@ -91,6 +92,6 @@ public final class DataReader: Reader { defer { byteOffset = data.count - 1 } - return data[data.startIndex + byteOffset.. Date: Mon, 18 Jan 2021 21:52:39 +0900 Subject: [PATCH 6/9] Integrate all functions to extension Reader --- Sources/OLEKit/DataStream.swift | 2 - Sources/OLEKit/DirectoryEntry.swift | 42 +--- Sources/OLEKit/FAT.swift | 4 +- Sources/OLEKit/FileWrapper+Extension.swift | 221 --------------------- Sources/OLEKit/OLEFile.swift | 51 ++--- Sources/OLEKit/Reader.swift | 7 +- 6 files changed, 29 insertions(+), 298 deletions(-) delete mode 100644 Sources/OLEKit/FileWrapper+Extension.swift diff --git a/Sources/OLEKit/DataStream.swift b/Sources/OLEKit/DataStream.swift index 49092c8..946feeb 100644 --- a/Sources/OLEKit/DataStream.swift +++ b/Sources/OLEKit/DataStream.swift @@ -30,8 +30,6 @@ public final class DataWriter { public final class DataReader: Reader { let data: Data - var regularFileContents: Data? { data } - /// Current byte offset within the stream. var byteOffset: Int diff --git a/Sources/OLEKit/DirectoryEntry.swift b/Sources/OLEKit/DirectoryEntry.swift index 7be10c4..7673550 100644 --- a/Sources/OLEKit/DirectoryEntry.swift +++ b/Sources/OLEKit/DirectoryEntry.swift @@ -144,11 +144,11 @@ public struct DirectoryEntry: Equatable { private static func entries( index: UInt32, at sectorID: UInt32, - in fileHandle: FileHandle, + in reader: Reader, _ header: Header, fat: [UInt32] ) throws -> [DirectoryEntry] { - var stream = try fileHandle.oleStream( + var stream = try reader.oleStream( sectorID: sectorID, firstSectorOffset: UInt64(header.sectorSize), sectorSize: header.sectorSize, @@ -164,44 +164,10 @@ public struct DirectoryEntry: Equatable { static func entries( rootAt sectorID: UInt32, - in fileHandle: FileHandle, + in reader: Reader, _ header: Header, fat: [UInt32] ) throws -> [DirectoryEntry] { - try Self.entries(index: 0, at: sectorID, in: fileHandle, header, fat: fat) + try Self.entries(index: 0, at: sectorID, in: reader, header, fat: fat) } - - #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - - private static func entries( - index: UInt32, - at sectorID: UInt32, - in fileWrapper: FileWrapper, - _ header: Header, - fat: [UInt32] - ) throws -> [DirectoryEntry] { - var stream = try fileWrapper.oleStream( - sectorID: sectorID, - firstSectorOffset: UInt64(header.sectorSize), - sectorSize: header.sectorSize, - fat: fat - ) - var peers = [DirectoryEntry]() - - if let entry = try DirectoryEntry(&stream, &peers, index: index, sectorSize: header.sectorSize) { - peers.append(entry) - } - return peers - } - - static func entries( - rootAt sectorID: UInt32, - in fileWrapper: FileWrapper, - _ header: Header, - fat: [UInt32] - ) throws -> [DirectoryEntry] { - try Self.entries(index: 0, at: sectorID, in: fileWrapper, header, fat: fat) - } - - #endif } diff --git a/Sources/OLEKit/FAT.swift b/Sources/OLEKit/FAT.swift index 77882a2..9a1758e 100644 --- a/Sources/OLEKit/FAT.swift +++ b/Sources/OLEKit/FAT.swift @@ -38,14 +38,14 @@ enum SectorID: UInt32 { Additional sectors are described by DIFAT blocks */ let maxFATSectorsCount: UInt32 = 109 -extension FileHandle { +extension Reader { func loadSector(_ header: Header, index: UInt32) throws -> DataReader { let sectorOffset = UInt64(header.sectorSize) * UInt64(index + 1) guard sectorOffset < header.fileSize else { throw OLEError.invalidFATSector(byteOffset: sectorOffset) } - seek(toFileOffset: sectorOffset) + seek(toOffset: Int(sectorOffset)) return DataReader(readData(ofLength: Int(header.sectorSize))) } diff --git a/Sources/OLEKit/FileWrapper+Extension.swift b/Sources/OLEKit/FileWrapper+Extension.swift deleted file mode 100644 index 4e34036..0000000 --- a/Sources/OLEKit/FileWrapper+Extension.swift +++ /dev/null @@ -1,221 +0,0 @@ -import Foundation - -#if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - -extension FileWrapper: Reader { - func seek(toOffset: Int) { - seek(toOffset: toOffset) - } - - func readData(ofLength: Int) -> Data { - readData(ofLength: ofLength) - } - - func readDataToEnd() -> Data { - readDataToEnd() - } - - func loadSector(_ header: Header, index: UInt32) throws -> DataReader { - let sectorOffset = Int(header.sectorSize) * Int(index + 1) - - guard sectorOffset < header.fileSize - else { throw OLEError.invalidFATSector(byteOffset: UInt64(sectorOffset)) } - - let range = sectorOffset..<(sectorOffset + Int(header.sectorSize)) - return DataReader(regularFileContents![range]) - } - - func loadSectors( - _ header: Header, - indexStream: inout DataReader, - count: UInt32 - ) throws -> [UInt32] { - var result = [UInt32]() - result.reserveCapacity(Int(count)) - - for _ in 0.. [UInt32] { - var fat = [UInt32]() - - try fat.append(contentsOf: loadSectors( - header, - indexStream: &headerStream, - count: maxFATSectorsCount - )) - - // Since FAT is read from fixed-size sectors, it may contain more values - // than the actual number of sectors in the file. - // Keep only the relevant sector indexes: - if UInt64(fat.count) > header.sectorsCount { - fat = Array(fat.prefix(header.sectorsCount)) - } - - if header.diFATSectorsCount > 0 { - // There's a DIFAT because file is larger than 6.8MB. - // Some checks just in case: - - // There must be at least 109 blocks in header and the rest in - // DIFAT, so number of sectors must be >109. - guard header.fatSectorsCount > maxFATSectorsCount else { - throw OLEError.incorrectNumberOfFATSectors( - actual: header.fatSectorsCount, - expected: maxFATSectorsCount - ) - } - - guard header.firstDIFATSector < UInt(header.sectorsCount) else { - throw OLEError.sectorIndexInDIFATOOB( - actual: header.firstDIFATSector, - expected: header.sectorsCount - ) - } - - // We compute the necessary number of DIFAT sectors : - // Number of pointers per DIFAT sector = (sectorsize/4)-1 - // (-1 because the last pointer is the next DIFAT sector number) - let sectorPointersCount = UInt32(header.sectorSize / 4) - 1 - // (if 512 bytes: each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) - let inferredCount = - (header.fatSectorsCount - 109 + sectorPointersCount - 1) / sectorPointersCount - - guard header.diFATSectorsCount == inferredCount else { - throw OLEError.incorrectNumberOFDIFATSectors( - actual: header.diFATSectorsCount, - expected: inferredCount - ) - } - - var currentSectorID = header.firstDIFATSector - for _ in 0.. [UInt32] { - // MiniFAT is stored in a standard sub-stream, pointed to by a header - // field. - // NOTE: there are two sizes to take into account for this stream: - // 1) Stream size is calculated according to the number of sectors - // declared in the OLE header. This allocated stream may be more than - // needed to store the actual sector indexes. - // 2) Actually used size is calculated by dividing the MiniStream size - // (given by root entry size) by the size of mini sectors, *4 for - // 32 bits indexes: - - let streamSize = UInt64(header.miniFATSectorsCount) * UInt64(header.sectorSize) - let miniSectorsCount = (root.streamSize + UInt64(header.miniSectorSize) - 1) / - UInt64(header.miniSectorSize) - - let stream = try oleStream( - sectorID: header.firstMiniFATSector, - expectedStreamSize: streamSize, - firstSectorOffset: UInt64(header.sectorSize), - sectorSize: header.sectorSize, - fat: fat - ) - - var result = [UInt32]() - result.reserveCapacity(Int(miniSectorsCount)) - for _ in 0.. DataReader { - guard !(expectedStreamSize == 0 && sectorID == SectorID.endOfChain.rawValue) - else { throw OLEError.invalidEmptyStream } - - let sectorSize = UInt64(sectorSize) - let calculatedStreamSize = expectedStreamSize ?? UInt64(fat.count) * UInt64(sectorSize) - let numberOfSectors = (calculatedStreamSize + sectorSize - 1) / sectorSize - - // This number should (at least) be less than the total number of - // sectors in the given FAT: - guard numberOfSectors <= fat.count - else { throw OLEError.streamTooLarge(actual: numberOfSectors, expected: fat.count) } - - var currentSectorID = sectorID - var data = Data() - var offset = regularFileContents!.startIndex - for _ in 0..= 0 && UInt64(currentSectorID) < fat.count - else { throw OLEError.invalidOLEStreamSectorID(id: currentSectorID, total: fat.count) } - - offset = regularFileContents!.startIndex + Int(firstSectorOffset) + Int(sectorSize) * Int(currentSectorID) - - // if sector is the last of the file, sometimes it is not a - // complete sector (of 512 or 4K), so we may read less than - // sectorsize. - if currentSectorID == fat.count - 1 { - data.append(regularFileContents![offset.. calculatedStreamSize { - // `data` is truncated to the expected stream size - data = data.prefix(Int(calculatedStreamSize)) - } else if let expectedStreamSize = expectedStreamSize, data.count < expectedStreamSize { - // the stream size was not inferred, but was smaller than expected - throw OLEError.incompleteStream( - firstSectorID: sectorID, - actual: data.count, - expected: expectedStreamSize - ) - } - - return DataReader(data) - } -} - -#endif diff --git a/Sources/OLEKit/OLEFile.swift b/Sources/OLEKit/OLEFile.swift index d5d12b3..f94203f 100644 --- a/Sources/OLEKit/OLEFile.swift +++ b/Sources/OLEKit/OLEFile.swift @@ -29,7 +29,7 @@ public final class OLEFile { public let root: DirectoryEntry - public init(_ path: String) throws { + public convenience init(_ path: String) throws { guard FileManager.default.fileExists(atPath: path) else { throw OLEError.fileDoesNotExist(path) } @@ -40,55 +40,48 @@ public final class OLEFile { guard let fileHandle = FileHandle(forReadingAtPath: path) else { throw OLEError.fileNotAvailableForReading(path: path) } - reader = fileHandle + let allData = fileHandle.readDataToEndOfFile() + fileHandle.seek(toFileOffset: UInt64(0)) - guard fileSize >= 512 - else { throw OLEError.incompleteHeader } + try self.init(data: allData, fileSize: fileSize, path: path) + } - let data = fileHandle.readData(ofLength: 512) + #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - var stream = DataReader(data) - header = try Header(&stream, fileSize: fileSize, path: path) + public convenience init(_ fileWrapper: FileWrapper) throws { + let fileName = fileWrapper.filename ?? "" - fat = try fileHandle.loadFAT(headerStream: &stream, header) + guard let data = fileWrapper.regularFileContents + else { throw OLEError.fileDoesNotExist(fileName) } - root = try DirectoryEntry.entries( - rootAt: header.firstDirectorySector, - in: fileHandle, - header, - fat: fat - )[0] + guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int + else { throw OLEError.fileDoesNotExist(fileName) } - miniFAT = try fileHandle.loadMiniFAT(header, root: root, fat: fat) + try self.init(data: data, fileSize: fileSize, path: fileName) } - #if os(iOS) || os(watchOS) || os(tvOS) || os(macOS) - - public init(_ fileWrapper: FileWrapper) throws { - reader = fileWrapper + #endif - guard let data = fileWrapper.regularFileContents - else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } + private init(data: Data, fileSize: Int, path: String) throws { + guard fileSize >= 512 + else { throw OLEError.incompleteHeader } + reader = DataReader(data) var stream = DataReader(data[..<512]) - guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int - else { throw OLEError.fileDoesNotExist(fileWrapper.filename ?? "") } - header = try Header(&stream, fileSize: fileSize, path: fileWrapper.filename ?? "") + header = try Header(&stream, fileSize: fileSize, path: path) - fat = try fileWrapper.loadFAT(headerStream: &stream, header) + fat = try reader.loadFAT(headerStream: &stream, header) root = try DirectoryEntry.entries( rootAt: header.firstDirectorySector, - in: fileWrapper, + in: reader, header, fat: fat )[0] - miniFAT = try fileWrapper.loadMiniFAT(header, root: root, fat: fat) + miniFAT = try reader.loadMiniFAT(header, root: root, fat: fat) } - #endif - /// Return an instance of `DataReader` that contains a given stream entry public func stream(_ entry: DirectoryEntry) throws -> DataReader { guard entry.type == .stream diff --git a/Sources/OLEKit/Reader.swift b/Sources/OLEKit/Reader.swift index 1d8364e..96f2ab0 100644 --- a/Sources/OLEKit/Reader.swift +++ b/Sources/OLEKit/Reader.swift @@ -1,13 +1,8 @@ import Foundation -/// Helper protocol that presents a unified interface for both `FileHandle` and `DataReader`. +/// Helper protocol that presents a unified interface for `FileHandle`, `FileWrapper` and `DataReader`. protocol Reader: AnyObject { func seek(toOffset: Int) func readData(ofLength: Int) -> Data func readDataToEnd() -> Data } - -extension FileHandle: Reader { - func seek(toOffset offset: Int) { seek(toFileOffset: UInt64(offset)) } - func readDataToEnd() -> Data { readDataToEndOfFile() } -} From 59d03e69ba9895738af53882b8ff6b407d97fb15 Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Mon, 18 Jan 2021 23:33:48 +0900 Subject: [PATCH 7/9] Group same throw to single condition Co-authored-by: Max Desiatov --- Sources/OLEKit/OLEFile.swift | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Sources/OLEKit/OLEFile.swift b/Sources/OLEKit/OLEFile.swift index f94203f..f14cd0e 100644 --- a/Sources/OLEKit/OLEFile.swift +++ b/Sources/OLEKit/OLEFile.swift @@ -51,10 +51,9 @@ public final class OLEFile { public convenience init(_ fileWrapper: FileWrapper) throws { let fileName = fileWrapper.filename ?? "" - guard let data = fileWrapper.regularFileContents - else { throw OLEError.fileDoesNotExist(fileName) } - - guard let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int + guard + let data = fileWrapper.regularFileContents, + let fileSize = fileWrapper.fileAttributes[FileAttributeKey.size.rawValue] as? Int else { throw OLEError.fileDoesNotExist(fileName) } try self.init(data: data, fileSize: fileSize, path: fileName) From f8de875f095719e410aa0fb66d2136b1adb3c643 Mon Sep 17 00:00:00 2001 From: Seungbin Oh Date: Mon, 18 Jan 2021 23:34:59 +0900 Subject: [PATCH 8/9] Change internal to fileprivate --- Sources/OLEKit/FAT.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/OLEKit/FAT.swift b/Sources/OLEKit/FAT.swift index 9a1758e..81a7b9a 100644 --- a/Sources/OLEKit/FAT.swift +++ b/Sources/OLEKit/FAT.swift @@ -36,7 +36,7 @@ enum SectorID: UInt32 { FAT sectors, right after the header which is 76 bytes long. (always 109, whatever the sector size: 512 bytes = 76+4*109) Additional sectors are described by DIFAT blocks */ -let maxFATSectorsCount: UInt32 = 109 +private let maxFATSectorsCount: UInt32 = 109 extension Reader { func loadSector(_ header: Header, index: UInt32) throws -> DataReader { From 6655194be3598efe72b45bbd823980d1e971134a Mon Sep 17 00:00:00 2001 From: Max Desiatov Date: Mon, 18 Jan 2021 14:59:02 +0000 Subject: [PATCH 9/9] Make `noStream` private --- Sources/OLEKit/DirectoryEntry.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/OLEKit/DirectoryEntry.swift b/Sources/OLEKit/DirectoryEntry.swift index 7673550..29c83e9 100644 --- a/Sources/OLEKit/DirectoryEntry.swift +++ b/Sources/OLEKit/DirectoryEntry.swift @@ -14,7 +14,7 @@ import Foundation -let noStream: UInt32 = 0xFFFF_FFFF +private let noStream: UInt32 = 0xFFFF_FFFF /* struct to parse directory entries: '<64sHBBIII16sIQQIII'