Skip to content

Commit

Permalink
Chunker: fix wrong EOF assumption[1], check for return type[2]
Browse files Browse the repository at this point in the history
[1]
This worked incidentally because OSes tend to return at least one page
worth of data when EOF is not reached. Increasing WINDOW_SIZE beyond
the page size might have lead to data loss.

[2]
If read() of the passed Python object returned something not-bytes,
PyBytes_Size returns -1 (ssize_t) which becomes a very larger number for
memcpy()s size_t.
  • Loading branch information
enkore committed Mar 27, 2016
1 parent db171e9 commit 61e2f12
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
6 changes: 5 additions & 1 deletion borg/_chunker.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ chunker_fill(Chunker *c)
return 0;
}
n = PyBytes_Size(data);
if(PyErr_Occurred()) {
// we wanted bytes(), but got something else
return 0;
}
if(n) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
c->remaining += n;
Expand Down Expand Up @@ -205,7 +209,7 @@ chunker_process(Chunker *c)
return NULL;
}
}
if(c->remaining < window_size) {
if(c->eof) {
c->done = 1;
if(c->remaining) {
c->bytes_yielded += c->remaining;
Expand Down
13 changes: 12 additions & 1 deletion borg/testsuite/chunker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from io import BytesIO

from ..chunker import Chunker, buzhash, buzhash_update
from ..archive import CHUNK_MAX_EXP
from ..archive import CHUNK_MAX_EXP, CHUNKER_PARAMS
from . import BaseTestCase


Expand Down Expand Up @@ -29,3 +29,14 @@ def test_buzhash(self):
self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
# Test with more than 31 bytes to make sure our barrel_shift macro works correctly
self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)

def test_small_reads(self):
class SmallReadFile:
input = b'a' * (20 + 1)

def read(self, nbytes):
self.input = self.input[:-1]
return self.input[:1]

reconstructed = b''.join(Chunker(0, *CHUNKER_PARAMS).chunkify(SmallReadFile()))
assert reconstructed == b'a' * 20

0 comments on commit 61e2f12

Please sign in to comment.