diff --git a/borg/archive.py b/borg/archive.py index bb4cdbe175b..55069102cfc 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -16,10 +16,12 @@ import time from io import BytesIO from . import xattr +from .compress import Compressor, COMPR_BUFFER from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \ parse_timestamp, to_localtime, format_time, format_timedelta, \ Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \ - ProgressIndicatorPercent + ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \ + PathPrefixPattern, FnmatchPattern, open_item from .platform import acl_get, acl_set from .chunker import Chunker from .hashindex import ChunkIndex @@ -231,7 +233,7 @@ def add_item(self, item): if self.show_progress: self.stats.show_progress(item=item, dt=0.2) self.items_buffer.add(item) - if time.time() - self.last_checkpoint > self.checkpoint_interval: + if self.checkpoint_interval and time.time() - self.last_checkpoint > self.checkpoint_interval: self.write_checkpoint() self.last_checkpoint = time.time() @@ -240,7 +242,7 @@ def write_checkpoint(self): del self.manifest.archives[self.checkpoint_name] self.cache.chunk_decref(self.id, self.stats) - def save(self, name=None, timestamp=None): + def save(self, name=None, timestamp=None, additional_metadata=None): name = name or self.name if name in self.manifest.archives: raise self.AlreadyExists(name) @@ -253,7 +255,7 @@ def save(self, name=None, timestamp=None): self.end = timestamp start = timestamp end = timestamp # we only have 1 value - metadata = StableDict({ + metadata = { 'version': 1, 'name': name, 'items': self.items_buffer.chunks, @@ -263,8 +265,9 @@ def save(self, name=None, timestamp=None): 'time': start.isoformat(), 'time_end': end.isoformat(), 'chunker_params': self.chunker_params, - }) - data = msgpack.packb(metadata, unicode_errors='surrogateescape') + } + metadata.update(additional_metadata or {}) + data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, data, self.stats) self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} @@ -458,6 +461,8 @@ def rename(self, name): self.manifest.archives[name] = {'id': new_id, 'time': metadata[b'time']} self.cache.chunk_decref(self.id, self.stats) del self.manifest.archives[self.name] + self.name = name + self.id = new_id def delete(self, stats, progress=False): unpacker = msgpack.Unpacker(use_list=False) @@ -569,7 +574,7 @@ def process_file(self, path, st, cache, ignore_inode=False): if ids is not None: # Make sure all ids are available for id_ in ids: - if not cache.seen_chunk(id_): + if not cache.get_chunk(id_)[0]: break else: chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] @@ -917,3 +922,289 @@ def finish(self, save_space=False): if self.repair: self.manifest.write() self.repository.commit(save_space=save_space) + + +class ArchiveRewriter: + class FakeTargetArchive: + def __init__(self): + self.stats = Statistics() + + class Interrupted(Exception): + def __init__(self, metadata=None): + self.metadata = metadata or {} + + @staticmethod + def is_temporary_archive(archive_name): + return archive_name.endswith('.rewrite') or archive_name.endswith('.rewrite.temp') + + def __init__(self, repository, manifest, key, cache, matcher, + exclude_caches=False, exclude_if_present=None, keep_tag_files=False, + chunker_params=None, compression=None, force_recompress=False, + dry_run=False, stats=False, progress=False, list=False): + self.repository = repository + self.key = key + self.manifest = manifest + self.cache = cache + + self.matcher = matcher + self.exclude_caches = exclude_caches + self.exclude_if_present = exclude_if_present or [] + self.keep_tag_files = keep_tag_files + + self.chunker_params = chunker_params or CHUNKER_PARAMS + self.compression = compression or dict(name='none') + self.force_recompress = force_recompress + self.seen_chunks = set() + self.recompress = bool(compression) or force_recompress + self.rechunkify = bool(chunker_params) + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(self.compression) + key.compressor = Compressor(**compr_args) + + self.dry_run = dry_run + self.stats = stats + self.progress = progress + self.list = list + + self.interrupt = False + self.errors = False + + def rewrite(self, archive_name): + assert not self.is_temporary_archive(archive_name) + archive = self.open_archive(archive_name) + target, resume_from = self.create_target_or_resume(archive) + + if self.exclude_if_present or self.exclude_caches: + self.matcher_add_tagged_dirs(archive) + try: + self.process_items(archive, target, resume_from) + except self.Interrupted as e: + return self.save(archive, target, completed=False, metadata=e.metadata) + return self.save(archive, target) + + def process_items(self, archive, target, resume_from=None): + matcher = self.matcher + target_is_subset = not matcher.empty() + hardlink_masters = {} if target_is_subset else None + + def item_is_hardlink_master(item): + return (target_is_subset and + stat.S_ISREG(item[b'mode']) and + item.get(b'hardlink_master', True) and + b'source' not in item and + not matcher.match(item[b'path'])) + + for item in archive.iter_items( + filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])): + if item_is_hardlink_master(item): + # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters + hardlink_masters[item[b'path']] = (item.get(b'chunks'), None) + continue + if resume_from: + # Fast forward to after the last processed file + if item[b'path'] == resume_from: + logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path'])) + resume_from = None + continue + if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters: + # master of this hard link is outside the target subset + chunks, new_source = hardlink_masters[item[b'source']] + if new_source is None: + # First item to use this master, move the chunks + item[b'chunks'] = chunks + hardlink_masters[item[b'source']] = (None, item[b'path']) + del item[b'source'] + else: + # Master was already moved, only update this item's source + item[b'source'] = new_source + if not self.dry_run: + try: + self.process_item(archive, target, item) + except self.Interrupted: + if self.progress: + target.stats.show_progress(final=True) + raise + assert resume_from is None, 'Reached end of source archive while fast-forwarding.' + if self.progress: + target.stats.show_progress(final=True) + + def process_item(self, archive, target, item): + if b'chunks' in item: + self.process_chunks(archive, target, item) + target.stats.nfiles += 1 + target.add_item(item) + if self.list: + logger.info('I %s', remove_surrogates(item[b'path'])) + if self.interrupt: + raise self.Interrupted + + def process_chunks(self, archive, target, item): + if not self.recompress and not self.rechunkify: + for chunk in item[b'chunks']: + self.cache.chunk_incref(chunk[0], target.stats) + return + chunk_iterator = archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]) + if self.rechunkify: + file = ChunkIteratorFileWrapper(chunk_iterator) + chunk_iterator = target.chunker.chunkify(file) + new_chunks = [] + if target.rewrite_partial_chunks: + # No incref, create_target_or_resume already did that + new_chunks = target.rewrite_partial_chunks + target.rewrite_partial_chunks = None + for chunk in new_chunks: + if chunk[1] > 4096: + self.seen_chunks.add(chunk[0]) + refcount, _, _ = self.cache.get_chunk(chunk[0]) + # not rechunkify-ing: this chunk will *become* unique for refcount == 2 (this & source) + unique = (self.rechunkify and refcount == 1) or (not self.rechunkify and refcount == 2) + target.stats.update(chunk[1], chunk[2], unique) + next(chunk_iterator) + logger.debug('Copied %d chunks from a partially processed item', len(new_chunks)) + for chunk in chunk_iterator: + # TODO: detect / skip / force like in recompress (cherry pick compr API changes from there) + # TODO: (after feature-hmac is merged, or rejected) + chunk_id = self.key.id_hash(chunk) + large_chunk = len(chunk) > 4096 + if large_chunk and chunk_id in self.seen_chunks: + new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats)) + else: + new_chunks.append(self.cache.add_chunk(chunk_id, chunk, target.stats, check_csize=self.recompress)) + if large_chunk: + self.seen_chunks.add(chunk_id) + if self.progress: + target.stats.show_progress(item=item, dt=0.2) + if self.interrupt: + raise self.Interrupted({ + 'rewrite_partial_chunks': new_chunks, + }) + if self.rechunkify: + assert file.exhausted + item[b'chunks'] = new_chunks + + def save(self, archive, target, completed=True, metadata=None): + if self.dry_run: + return completed + if completed: + timestamp = archive.ts.replace(tzinfo=None) + target.save(timestamp=timestamp, additional_metadata={ + 'cmdline': archive.metadata[b'cmdline'], + 'rewrite_cmdline': sys.argv, + }) + archive.delete(Statistics(), progress=self.progress) + target.rename(archive.name) + if self.stats: + target.end = datetime.utcnow() + log_multi(DASHES, + str(target), + DASHES, + str(target.stats), + str(self.cache), + DASHES) + else: + additional_metadata = metadata or {} + additional_metadata.update({ + 'rewrite_source_id': archive.id, + 'rewrite_args': sys.argv[1:], + }) + target.save(name=archive.name + '.rewrite', additional_metadata=additional_metadata) + logger.info('Run the same command again to resume.') + return completed + + def matcher_add_tagged_dirs(self, archive): + def exclude(dir, tag_item): + if self.keep_tag_files: + tag_files.append(PathPrefixPattern(tag_item[b'path'])) + tagged_dirs.append(FnmatchPattern(dir + '/')) + else: + tagged_dirs.append(PathPrefixPattern(dir)) + + matcher = self.matcher + tag_files = [] + tagged_dirs = [] + # build hardlink masters, but only for paths ending in CACHEDIR.TAG, so we can read hard-linked CACHEDIR.TAGs + cachedir_masters = {} + + for item in archive.iter_items( + filter=lambda item: item[b'path'].endswith('CACHEDIR.TAG') or matcher.match(item[b'path'])): + if item[b'path'].endswith('CACHEDIR.TAG'): + cachedir_masters[item[b'path']] = item + if stat.S_ISREG(item[b'mode']): + dir, tag_file = os.path.split(item[b'path']) + logger.debug(tag_file) + if tag_file in self.exclude_if_present: + exclude(dir, item) + if self.exclude_caches and tag_file == 'CACHEDIR.TAG': + tag_contents = b'Signature: 8a477f597d28d172789f06886806bc55' + if b'chunks' in item: + file = open_item(archive, item) + else: + file = open_item(archive, cachedir_masters[item[b'source']]) + if file.read(len(tag_contents)).startswith(tag_contents): + exclude(dir, item) + matcher.add(tag_files, True) + matcher.add(tagged_dirs, False) + + def create_target_or_resume(self, archive): + if self.dry_run: + return self.FakeTargetArchive(), None + target_name = archive.name + '.rewrite' + resume = target_name in self.manifest.archives + if resume: + target, resume_from = self.try_resume(archive, target_name) + if target: + return target, resume_from + return self.create_target_archive(target_name), None + + def try_resume(self, archive, target_name): + logger.info('Found %s, will resume interrupted operation', target_name) + old_target = self.open_archive(target_name) + resume_id = old_target.metadata[b'rewrite_source_id'] + resume_args = [arg.decode('utf-8', 'surrogateescape') for arg in old_target.metadata[b'rewrite_args']] + if resume_id != archive.id: + logger.warning('Source archive changed, will discard %s and start over', target_name) + logger.warning('Saved fingerprint: %s', hexlify(resume_id).decode('ascii')) + logger.warning('Current fingerprint: %s', archive.fpr) + old_target.delete(Statistics(), progress=self.progress) + return None, None # can't resume + if resume_args != sys.argv[1:]: + logger.warning('Command line changed, this might lead to inconsistencies') + logger.warning('Saved: %s', repr(resume_args)) + logger.warning('Current: %s', repr(sys.argv[1:])) + target_name += '.temp' + if target_name in self.manifest.archives: + logger.warning('Found temporary replay-archive from earlier resume, deleting') + must_delete = self.open_archive(target_name) + must_delete.delete(Statistics(), progress=self.progress) + target = self.create_target_archive(target_name) + logger.info('Replaying items from interrupted operation...') + item = None + for item in old_target.iter_items(): + if b'chunks' in item: + for chunk in item[b'chunks']: + self.cache.chunk_incref(chunk[0], target.stats) + target.stats.nfiles += 1 + target.add_item(item) + if item: + resume_from = item[b'path'] + else: + resume_from = None + if self.progress: + old_target.stats.show_progress(final=True) + target.rewrite_partial_chunks = old_target.metadata.get(b'rewrite_partial_chunks', []) + for chunk_id, _, _ in target.rewrite_partial_chunks: + # incref now, otherwise old_target.delete() might delete these chunks + self.cache.chunk_incref(chunk_id, target.stats) + old_target.delete(Statistics(), progress=self.progress) + logger.info('Done replaying items') + return target, resume_from + + def create_target_archive(self, name): + archive = Archive(self.repository, self.key, self.manifest, name, create=True, + progress=self.progress, chunker_params=self.chunker_params, cache=self.cache, + checkpoint_interval=0) + archive.rewrite_partial_chunks = None + return archive + + def open_archive(self, name, **kwargs): + return Archive(self.repository, self.key, self.manifest, name, cache=self.cache, **kwargs) diff --git a/borg/archiver.py b/borg/archiver.py index 5868c3082d7..a1868e0bf4e 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ get_cache_dir, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ - EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter + EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter, DASHES from .logger import create_logger, setup_logging logger = create_logger() from .compress import Compressor, COMPR_BUFFER @@ -29,7 +29,7 @@ from .repository import Repository from .cache import Cache from .key import key_creator, RepoKey, PassphraseKey -from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS +from .archive import Archive, ArchiveChecker, ArchiveRewriter, CHUNKER_PARAMS from .remote import RepositoryServer, RemoteRepository, cache_if_remote has_lchflags = hasattr(os, 'lchflags') @@ -37,8 +37,6 @@ # default umask, overriden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 -DASHES = '-' * 78 - def argument(args, str_or_bool): """If bool is passed, return it. If str is passed, retrieve named attribute from args.""" @@ -396,7 +394,7 @@ def item_is_hardlink_master(item): filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])): orig_path = item[b'path'] if item_is_hardlink_master(item): - hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source')) + hardlink_masters[orig_path] = (item.get(b'chunks'), None) if not matcher.match(item[b'path']): continue if strip_components: @@ -738,6 +736,47 @@ def do_upgrade(self, args): print("warning: %s" % e) return self.exit_code + @with_repository(cache=True, exclusive=True) + def do_rewrite(self, args, repository, manifest, key, cache): + """Rewrite archive contents""" + def interrupt(signal_num, stack_frame): + if rewriter.interrupt: + print("Received signal, again. I'm not deaf.\n", file=sys.stderr) + else: + print("Received signal, will exit cleanly.\n", file=sys.stderr) + rewriter.interrupt = True + + matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + + rewriter = ArchiveRewriter(repository, manifest, key, cache, matcher, + exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, + keep_tag_files=args.keep_tag_files, + compression=args.compression, chunker_params=args.chunker_params, + progress=args.progress, stats=args.stats, + list=args.output_list, dry_run=args.dry_run) + + signal.signal(signal.SIGTERM, interrupt) + signal.signal(signal.SIGINT, interrupt) + + if args.location.archive: + name = args.location.archive + if rewriter.is_temporary_archive(name): + self.print_error('Refusing to rewrite temporary archive of prior rewrite: %s', name) + return self.exit_code + rewriter.rewrite(name) + else: + for archive in manifest.list_archive_infos(sort_by='ts'): + name = archive.name + if rewriter.is_temporary_archive(name): + continue + print('Rewriting', name) + if not rewriter.rewrite(name): + break + manifest.write() + repository.commit() + cache.commit() + return self.exit_code + @with_repository() def do_debug_dump_archive_items(self, args, repository, manifest, key): """dump (decrypted, decompressed) archive items metadata (not: data)""" @@ -1514,6 +1553,102 @@ def build_parser(self, args=None, prog=None): type=location_validator(archive=False), help='path to the repository to be upgraded') + rewrite_epilog = textwrap.dedent(""" + Rewrites the contents of existing archives. + + --exclude, --exclude-from and PATH have the exact same semantics + as in borg create, this means if a PATH is specified the + rewrite includes that path and nothing else: PATH does *not* restrict + the rewrite to a path. + + --compression recompresses all chunks. Due to how Borg stores compressed size + information this might display incorrect information for archives that were not + rewritten at the same time. + There is no risk of data loss by this. Use --force to recompress chunks already + using the specified compression algorithm. + + --chunker-params will re-chunk all files in the archive, this can be + used to have upgraded Borg 0.xx or Attic archives deduplicate with + Borg 1.x archives. + + Currently the only file status used for --list is 'I' (file/dir included in + rewritten archive). + + borg rewrite is signal safe. Send either SIGINT (Ctrl-C on most terminals) or + SIGTERM to request termination. + + Use the *exact same* command line to resume the operation later - changing excludes + or paths will lead to inconsistencies (changed excludes will only apply to newly + processed files/dirs). Changing compression leads to incorrect size information + (which does not cause any data loss, but can be misleading). + + USE WITH CAUTION. Permanent data loss by specifying incorrect patterns is possible. + + Note: The archive under rewrite is only removed after the operation completes. The + archive that is built during the rewrite exists at the same time at + .rewrite. + + Note: When recompressing or (especially) rechunking space usage can be substantial. + + Note: This changes the archive ID. + """) + subparser = subparsers.add_parser('rewrite', parents=[common_parser], + description=self.do_rewrite.__doc__, + epilog=rewrite_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='create backup') + subparser.set_defaults(func=self.do_rewrite) + subparser.add_argument('--list', dest='output_list', + action='store_true', default=False, + help='output verbose list of items (files, dirs, ...)') + subparser.add_argument('-p', '--progress', dest='progress', + action='store_true', default=False, + help='show progress display while rewriting archives') + subparser.add_argument('-f', '--force', dest='force_recompress', + action='store_true', default=False, + help='even recompress chunks already compressed with the algorithm set with ' + '--compression') + subparser.add_argument('-n', '--dry-run', dest='dry_run', + action='store_true', default=False, + help='do not change anything') + subparser.add_argument('-s', '--stats', dest='stats', + action='store_true', default=False, + help='print statistics at end') + subparser.add_argument('-e', '--exclude', dest='excludes', + type=parse_pattern, action='append', + metavar="PATTERN", help='exclude paths matching PATTERN') + subparser.add_argument('--exclude-from', dest='exclude_files', + type=argparse.FileType('r'), action='append', + metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--exclude-caches', dest='exclude_caches', + action='store_true', default=False, + help='exclude directories that contain a CACHEDIR.TAG file (' + 'http://www.brynosaurus.com/cachedir/spec.html)') + subparser.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='FILENAME', action='append', type=str, + help='exclude directories that contain the specified file') + subparser.add_argument('--keep-tag-files', dest='keep_tag_files', + action='store_true', default=False, + help='keep tag files of excluded caches/directories') + subparser.add_argument('-C', '--compression', dest='compression', + type=CompressionSpec, default=None, metavar='COMPRESSION', + help='select compression algorithm (and level): ' + 'none == no compression (default), ' + 'lz4 == lz4, ' + 'zlib == zlib (default level 6), ' + 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' + 'lzma == lzma (default level 6), ' + 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + subparser.add_argument('--chunker-params', dest='chunker_params', + type=ChunkerParams, default=None, + metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', + help='specify the chunker parameters (or "default").') + subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', + type=location_validator(), + help='repository/archive to rewrite') + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, + help='paths to rewrite; patterns are supported') + subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', diff --git a/borg/cache.py b/borg/cache.py index 452a154b6cb..4954bc52936 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -358,27 +358,29 @@ def legacy_cleanup(): self.do_cache = os.path.isdir(archive_path) self.chunks = create_master_idx(self.chunks) - def add_chunk(self, id, data, stats): + def add_chunk(self, id, data, stats, check_csize=False): if not self.txn_active: self.begin_txn() size = len(data) - if self.seen_chunk(id, size): + count, _, stored_csize = self.get_chunk(id, size) + if count and not check_csize: return self.chunk_incref(id, stats) data = self.key.encrypt(data) csize = len(data) - self.repository.put(id, data, wait=False) - self.chunks[id] = (1, size, csize) + if csize != stored_csize: + self.repository.put(id, data, wait=False) + self.chunks[id] = (count + 1, size, csize) stats.update(size, csize, True) return id, size, csize - def seen_chunk(self, id, size=None): - refcount, stored_size, _ = self.chunks.get(id, (0, None, None)) + def get_chunk(self, id, size=None): + refcount, stored_size, stored_csize = self.chunks.get(id, (0, None, None)) if size is not None and stored_size is not None and size != stored_size: # we already have a chunk with that id, but different size. # this is either a hash collision (unlikely) or corruption or a bug. - raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % ( - id, stored_size, size)) - return refcount + raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % + (id, stored_size, size)) + return refcount, stored_size, stored_csize def chunk_incref(self, id, stats): if not self.txn_active: diff --git a/borg/compress.pyx b/borg/compress.pyx index 3bb88def7d2..4c1d4dcc77a 100644 --- a/borg/compress.pyx +++ b/borg/compress.pyx @@ -182,17 +182,21 @@ class Compressor: self.params = kwargs self.compressor = get_compressor(name, **self.params) - def compress(self, data): - return self.compressor.compress(data) - - def decompress(self, data): + @staticmethod + def detect(data): hdr = bytes(data[:2]) # detect() does not work with memoryview for cls in COMPRESSOR_LIST: if cls.detect(hdr): - return cls(**self.params).decompress(data) + return cls else: raise ValueError('No decompressor for this data found: %r.', data[:2]) + def compress(self, data): + return self.compressor.compress(data) + + def decompress(self, data): + return self.detect(data)(**self.params).decompress(data) + # a buffer used for (de)compression result, which can be slightly bigger # than the chunk buffer in the worst (incompressible data) case, add 10%: diff --git a/borg/helpers.py b/borg/helpers.py index a53023ae522..b8a0ff5e12a 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -17,6 +17,7 @@ import unicodedata import logging + from .logger import create_logger logger = create_logger() @@ -40,6 +41,8 @@ EXIT_WARNING = 1 # reached normal end of operation, but there were issues EXIT_ERROR = 2 # terminated abruptly, did not reach end of operation +DASHES = '-' * 78 + class Error(Exception): """Error base class""" @@ -491,6 +494,9 @@ def timestamp(s): def ChunkerParams(s): + if s.strip().lower() == "default": + from .archive import CHUNKER_PARAMS + return CHUNKER_PARAMS chunk_min, chunk_max, chunk_mask, window_size = s.split(',') if int(chunk_max) > 23: # do not go beyond 2**23 (8MB) chunk size now, @@ -1268,3 +1274,47 @@ def format_time(self, key, item): def time(self, key, item): return safe_timestamp(item.get(key) or item[b'mtime']) + + +class ChunkIteratorFileWrapper: + """File-like wrapper for chunk iterators""" + + def __init__(self, chunk_iterator): + self.chunk_iterator = chunk_iterator + self.chunk_offset = 0 + self.chunk = b'' + self.exhausted = False + + def _refill(self): + remaining = len(self.chunk) - self.chunk_offset + if not remaining: + try: + self.chunk = memoryview(next(self.chunk_iterator)) + except StopIteration: + self.exhausted = True + return 0 # EOF + self.chunk_offset = 0 + remaining = len(self.chunk) + return remaining + + def _read(self, nbytes): + if not nbytes: + return b'' + remaining = self._refill() + will_read = min(remaining, nbytes) + self.chunk_offset += will_read + return self.chunk[self.chunk_offset - will_read:self.chunk_offset] + + def read(self, nbytes): + parts = [] + while nbytes and not self.exhausted: + read_data = self._read(nbytes) + nbytes -= len(read_data) + parts.append(read_data) + return b''.join(parts) + + +def open_item(archive, item): + """Return file-like object for archived item (with chunks).""" + chunk_iterator = archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]) + return ChunkIteratorFileWrapper(chunk_iterator) diff --git a/borg/key.py b/borg/key.py index 113214ab6bb..349938fc9a9 100644 --- a/borg/key.py +++ b/borg/key.py @@ -75,9 +75,12 @@ def id_hash(self, data): def encrypt(self, data): pass - def decrypt(self, id, data): + def decrypt(self, id, data, decompress=True): pass + def assert_chunk_id(self, id, data): + """raise IntegrityError if id doesn't match data""" + class PlaintextKey(KeyBase): TYPE = 0x02 @@ -99,14 +102,20 @@ def id_hash(self, data): def encrypt(self, data): return b''.join([self.TYPE_STR, self.compressor.compress(data)]) - def decrypt(self, id, data): + def decrypt(self, id, data, decompress=True): if data[0] != self.TYPE: raise IntegrityError('Invalid encryption envelope') + if not decompress: + return memoryview(data)[1:] data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') return data + def assert_chunk_id(self, id, data): + if sha256(data).digest() != id: + raise IntegrityError('Chunk id verification failed') + class AESKeyBase(KeyBase): """Common base class shared by KeyfileKey and PassphraseKey diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 7720bc5dd52..9700b2361bd 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -2,6 +2,7 @@ from configparser import ConfigParser import errno import os +import inspect from io import StringIO import random import stat @@ -17,7 +18,7 @@ import pytest from .. import xattr -from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP +from ..archive import Archive, ChunkBuffer, ArchiveRewriter, CHUNK_MAX_EXP from ..archiver import Archiver from ..cache import Cache from ..crypto import bytes_to_long, num_aes_blocks @@ -233,9 +234,6 @@ def cmd(self, *args, **kw): def create_src_archive(self, name): self.cmd('create', self.repository_location + '::' + name, src_dir) - -class ArchiverTestCase(ArchiverTestCaseBase): - def create_regular_file(self, name, size=0, contents=None): filename = os.path.join(self.input_path, name) if not os.path.exists(os.path.dirname(filename)): @@ -274,7 +272,8 @@ def create_test_files(self): # same for newer ubuntu and centos. # if this is supported just on specific platform, platform should be checked first, # so that the test setup for all tests using it does not fail here always for others. - # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) + # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', + # follow_symlinks=False) # FIFO node os.mkfifo(os.path.join(self.input_path, 'fifo1')) if has_lchflags: @@ -293,6 +292,8 @@ def create_test_files(self): have_root = False return have_root + +class ArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): have_root = self.create_test_files() self.cmd('init', self.repository_location) @@ -635,29 +636,56 @@ def test_extract_with_pattern(self): self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) - def test_exclude_caches(self): + def _create_test_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('cache1/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') self.create_regular_file('cache2/CACHEDIR.TAG', contents=b'invalid signature') - self.cmd('create', '--exclude-caches', self.repository_location + '::test', 'input') + os.mkdir('input/cache3') + os.link('input/cache1/CACHEDIR.TAG', 'input/cache3/CACHEDIR.TAG') + + def _assert_test_caches(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) - def test_exclude_tagged(self): + def test_exclude_caches(self): + self._create_test_caches() + self.cmd('create', '--exclude-caches', self.repository_location + '::test', 'input') + self._assert_test_caches() + + def test_rewrite_exclude_caches(self): + self._create_test_caches() + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('rewrite', '--exclude-caches', self.repository_location + '::test') + self._assert_test_caches() + + def _create_test_tagged(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('tagged1/.NOBACKUP') self.create_regular_file('tagged2/00-NOBACKUP') self.create_regular_file('tagged3/.NOBACKUP/file2') - self.cmd('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + + def _assert_test_tagged(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) - def test_exclude_keep_tagged(self): + def test_exclude_tagged(self): + self._create_test_tagged() + self.cmd('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + self._assert_test_tagged() + + def test_rewrite_exclude_tagged(self): + self._create_test_tagged() + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('rewrite', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', + self.repository_location + '::test') + self._assert_test_tagged() + + def _create_test_keep_tagged(self): self.cmd('init', self.repository_location) self.create_regular_file('file0', size=1024) self.create_regular_file('tagged1/.NOBACKUP1') @@ -670,8 +698,8 @@ def test_exclude_keep_tagged(self): self.create_regular_file('taggedall/.NOBACKUP2') self.create_regular_file('taggedall/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') self.create_regular_file('taggedall/file4', size=1024) - self.cmd('create', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', - '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + + def _assert_test_keep_tagged(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file0', 'tagged1', 'tagged2', 'tagged3', 'taggedall']) @@ -681,6 +709,19 @@ def test_exclude_keep_tagged(self): self.assert_equal(sorted(os.listdir('output/input/taggedall')), ['.NOBACKUP1', '.NOBACKUP2', 'CACHEDIR.TAG', ]) + def test_exclude_keep_tagged(self): + self._create_test_keep_tagged() + self.cmd('create', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', + '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + self._assert_test_keep_tagged() + + def test_rewrite_exclude_keep_tagged(self): + self._create_test_keep_tagged() + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('rewrite', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', + '--exclude-caches', '--keep-tag-files', self.repository_location + '::test') + self._assert_test_keep_tagged() + def test_path_normalization(self): self.cmd('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80) @@ -850,6 +891,14 @@ def test_file_status(self): # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file self.assert_in("A input/file2", output) + def test_create_delete_inbetween(self): + # Regression test case for bugfix in archive creation when a prior archive was deleted + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test1', 'input') + self.cmd('delete', self.repository_location + '::test1') + self.cmd('create', self.repository_location + '::test2', 'input') + def test_create_topical(self): now = time.time() self.create_regular_file('file1', size=1024 * 80) @@ -1111,12 +1160,223 @@ def test_debug_put_get_delete_obj(self): output = self.cmd('debug-delete-obj', self.repository_location, 'invalid') assert "is invalid" in output + def test_rewrite_basic(self): + self.create_test_files() + self.create_regular_file('dir2/file3', size=1024 * 80) + self.cmd('init', self.repository_location) + archive = self.repository_location + '::test0' + self.cmd('create', archive, 'input') + self.cmd('rewrite', archive, 'input/dir2', '-e', 'input/dir2/file3') + listing = self.cmd('list', '--short', archive) + assert 'file1' not in listing + assert 'dir2/file2' in listing + assert 'dir2/file3' not in listing + + def test_rewrite_subtree_hardlinks(self): + # This is essentially the same problem set as in test_extract_hardlinks + self._extract_hardlinks_setup() + self.cmd('create', self.repository_location + '::test2', 'input') + self.cmd('rewrite', self.repository_location + '::test', 'input/dir1') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + assert os.stat('input/dir1/hardlink').st_nlink == 2 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert os.stat('input/dir1/aaaa').st_nlink == 2 + assert os.stat('input/dir1/source2').st_nlink == 2 + with changedir('output'): + self.cmd('extract', self.repository_location + '::test2') + assert os.stat('input/dir1/hardlink').st_nlink == 4 + + def test_rewrite_rechunkify(self): + with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd: + fd.write(b'a' * 250) + fd.write(b'b' * 250) + self.cmd('init', self.repository_location) + self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input') + self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache') + list = self.cmd('list', self.repository_location + '::test1', 'input/large_file', + '--format', '{num_chunks} {unique_chunks}') + num_chunks, unique_chunks = map(int, list.split(' ')) + # test1 and test2 do not deduplicate + assert num_chunks == unique_chunks + self.cmd('rewrite', self.repository_location, '--chunker-params', 'default') + # test1 and test2 do deduplicate after rewrite + assert not int(self.cmd('list', self.repository_location + '::test1', 'input/large_file', + '--format', '{unique_chunks}')) + + def test_rewrite_recompress(self): + self.create_regular_file('compressible', size=10000) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + list = self.cmd('list', self.repository_location + '::test', 'input/compressible', + '--format', '{size} {csize}') + size, csize = map(int, list.split(' ')) + assert csize >= size + self.cmd('rewrite', self.repository_location, '-C', 'lz4') + list = self.cmd('list', self.repository_location + '::test', 'input/compressible', + '--format', '{size} {csize}') + size, csize = map(int, list.split(' ')) + assert csize < size + + def test_rewrite_dry_run(self): + self.create_regular_file('compressible', size=10000) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + archives_before = self.cmd('list', self.repository_location + '::test') + self.cmd('rewrite', self.repository_location, '-n', '-e', 'input/compressible') + archives_after = self.cmd('list', self.repository_location + '::test') + assert archives_after == archives_before + + def _rewrite_interrupt_patch(self, interrupt_after_n_1_files): + def interrupt(self, *args): + if interrupt_after_n_1_files: + self.interrupt = True + pi_save(self, *args) + else: + raise ArchiveRewriter.Interrupted + + def process_item_patch(*args): + return pi_call.pop(0)(*args) + + pi_save = ArchiveRewriter.process_item + pi_call = [pi_save] * interrupt_after_n_1_files + [interrupt] + return process_item_patch + + def _test_rewrite_interrupt(self, create_stale_temp, change_args, interrupt_early): + self.create_test_files() + self.create_regular_file('dir2/abcdef') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + process_files = 1 + if interrupt_early: + process_files = 0 + args = ['-sv', '--list'] + with patch.object(ArchiveRewriter, 'process_item', self._rewrite_interrupt_patch(process_files)): + self.cmd('rewrite', *args, self.repository_location, 'input/dir2') + assert 'test.rewrite' in self.cmd('list', self.repository_location) + if create_stale_temp: + # Create a test.rewrite.temp to see that it doesn't stumble over that + self.cmd('create', self.repository_location + '::test.rewrite.temp', '/dev/null') + if change_args: + with patch.object(sys, 'argv', sys.argv + ['non-forking tests don\'t use sys.argv']): + output = self.cmd('rewrite', *args, '-pC', 'lz4', self.repository_location, 'input/dir2') + else: + output = self.cmd('rewrite', *args, self.repository_location, 'input/dir2') + assert 'Found test.rewrite, will resume' in output + assert change_args == ('Command line changed' in output) + assert create_stale_temp == ('Found temporary replay-archive' in output) + if not interrupt_early: + assert 'Fast-forwarded to input/dir2/abcdef' in output + assert 'I input/dir2/abcdef' not in output + assert 'I input/dir2/file2' in output + archives = self.cmd('list', self.repository_location) + assert 'test.rewrite' not in archives + assert 'test' in archives + files = self.cmd('list', self.repository_location + '::test') + assert 'dir2/file2' in files + assert 'dir2/abcdef' in files + assert 'file1' not in files + + def test_rewrite_interrupt(self): + # Again, this would be a job for pytest fixtures + self.tearDown() + for create_stale_temp, change_args, interrupt_early in [ + # don't test all eight combinations, would take too long + (False, True, True), + (False, False, True), + (True, True, False), + (True, False, False), + ]: + self.setUp() + try: + # In _test_rewrite_interrupt --inplace is only hit with change_args + self._test_rewrite_interrupt(create_stale_temp, change_args, interrupt_early) + finally: + self.tearDown() + self.setUp() + + @pytest.mark.skip() + def test_rewrite_interrupt_full(self): + # Again, this would be a job for pytest fixtures + self.tearDown() + for create_stale_temp in (True, False): + for change_args in (True, False): + for interrupt_early in (True, False): + self.setUp() + try: + self._test_rewrite_interrupt(create_stale_temp, change_args, interrupt_early) + finally: + self.tearDown() + self.setUp() + + def _test_rewrite_chunker_interrupt_patch(self, n_chunks): + real_add_chunk = Cache.add_chunk + + def add_chunk(*args, **kwargs): + nonlocal n_chunks + if n_chunks <= 0: + frame = inspect.stack()[2] + try: + caller_self = frame.frame.f_locals['self'] + if caller_self.__class__ is ArchiveRewriter: + caller_self.interrupt = True + finally: + del frame + n_chunks -= 1 + return real_add_chunk(*args, **kwargs) + return add_chunk + + def test_rewrite_rechunkify_interrupt(self): + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') + with patch.object(Cache, 'add_chunk', self._test_rewrite_chunker_interrupt_patch(1)): + self.cmd('rewrite', '-p', '--chunker-params', '16,18,17,4095', self.repository_location) + assert 'test.rewrite' in self.cmd('list', self.repository_location) + output = self.cmd('rewrite', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location) + assert 'Found test.rewrite, will resume' in output + assert 'Copied 1 chunks from a partially processed item' in output + archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') + assert archive_after == archive_before + + def test_rewrite_changed_source(self): + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with patch.object(ArchiveRewriter, 'process_item', self._rewrite_interrupt_patch(1)): + self.cmd('rewrite', self.repository_location, 'input/dir2') + assert 'test.rewrite' in self.cmd('list', self.repository_location) + self.cmd('delete', self.repository_location + '::test') + self.cmd('create', self.repository_location + '::test', 'input') + output = self.cmd('rewrite', self.repository_location, 'input/dir2') + assert 'Source archive changed, will discard test.rewrite and start over' in output + + def test_rewrite_refuses_temporary(self): + self.cmd('init', self.repository_location) + self.cmd('rewrite', self.repository_location + '::cba.rewrite', exit_code=2) + self.cmd('rewrite', self.repository_location + '::abc.rewrite.temp', exit_code=2) + + + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase): EXE = 'borg.exe' FORK_DEFAULT = True + @unittest.skip('patches objects') + def test_rewrite_rechunkify_interrupt(self): + ... + + @unittest.skip('patches objects') + def test_rewrite_interrupt(self): + ... + + @unittest.skip('patches objects') + def test_rewrite_changed_source(self): + ... + class ArchiverCheckTestCase(ArchiverTestCaseBase): @@ -1232,9 +1492,6 @@ def test_debug_put_get_delete_obj(self): class DiffArchiverTestCase(ArchiverTestCaseBase): - create_test_files = ArchiverTestCase.create_test_files - create_regular_file = ArchiverTestCase.create_regular_file - def test_basic_functionality(self): self.create_test_files() self.cmd('init', self.repository_location) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 3a3f2361d49..a3de5c101b1 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -15,7 +15,7 @@ yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ - PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format + PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper from . import BaseTestCase, environment_variable, FakeInputs @@ -885,3 +885,14 @@ def test_partial_format(): assert partial_format('{unknown_key}', {}) == '{unknown_key}' assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}' assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}' + + +def test_chunk_file_wrapper(): + cfw = ChunkIteratorFileWrapper(iter([b'abc', b'def'])) + assert cfw.read(2) == b'ab' + assert cfw.read(50) == b'cdef' + assert cfw.exhausted + + cfw = ChunkIteratorFileWrapper(iter([])) + assert cfw.read(2) == b'' + assert cfw.exhausted diff --git a/docs/usage.rst b/docs/usage.rst index 70db163d696..f935c7664c2 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -595,6 +595,15 @@ Examples no key file found for repository + +.. include:: usage/rewrite.rst.inc + +Examples +~~~~~~~~ +:: + + TODO/later + Miscellaneous Help ------------------ diff --git a/docs/usage/rewrite.rst.inc b/docs/usage/rewrite.rst.inc new file mode 100644 index 00000000000..6199d749214 --- /dev/null +++ b/docs/usage/rewrite.rst.inc @@ -0,0 +1,103 @@ +.. _borg_rewrite: + +borg rewrite +------------ +:: + + usage: borg rewrite [-h] [-v] [--debug] [--lock-wait N] [--show-version] + [--show-rc] [--no-files-cache] [--umask M] + [--remote-path PATH] [--list] [-p] [-f] [-n] [-s] + [-e PATTERN] [--exclude-from EXCLUDEFILE] + [--exclude-caches] [--exclude-if-present FILENAME] + [--keep-tag-files] [-C COMPRESSION] + [--timestamp yyyy-mm-ddThh:mm:ss] + [--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE] + [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]] + + Rewrite archive contents + + positional arguments: + REPOSITORY_OR_ARCHIVE + repository/archive to rewrite + PATH paths to rewrite; patterns are supported + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-version show/log the borg version + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + --list output verbose list of items (files, dirs, ...) + -p, --progress show progress display while rewriting archives + -f, --force even recompress chunks already compressed with the + algorithm set with --compression + -n, --dry-run do not change anything + -s, --stats print statistics at end + -e PATTERN, --exclude PATTERN + exclude paths matching PATTERN + --exclude-from EXCLUDEFILE + read exclude patterns from EXCLUDEFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file + (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present FILENAME + exclude directories that contain the specified file + --keep-tag-files keep tag files of excluded caches/directories + -C COMPRESSION, --compression COMPRESSION + select compression algorithm (and level): none == no + compression (default), lz4 == lz4, zlib == zlib + (default level 6), zlib,0 .. zlib,9 == zlib (with + level 0..9), lzma == lzma (default level 6), lzma,0 .. + lzma,9 == lzma (with level 0..9). + --timestamp yyyy-mm-ddThh:mm:ss + manually specify the archive creation date/time (UTC). + alternatively, give a reference file/directory. + --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE + specify the chunker parameters (or "default"). + +Description +~~~~~~~~~~~ + +Rewrites the contents of existing archives. + +--exclude, --exclude-from and PATH have the exact same semantics +as in borg create, this means if a PATH is specified the +rewrite includes that path and nothing else: PATH does *not* restrict +the rewrite to a path. + +--compression recompresses all chunks. Due to how Borg stores compressed size +information this might display incorrect information for archives that were not +rewritten at the same time. +There is no risk of data loss by this. Use --force to recompress chunks already +using the specified compression algorithm. + +--chunker-params will re-chunk all files in the archive, this can be +used to have upgraded Borg 0.xx or Attic archives deduplicate with +Borg 1.x archives. + +Currently the only file status used for --list is 'I' (file/dir included in +rewritten archive). + +borg rewrite is signal safe. Send either SIGINT (Ctrl-C on most terminals) or +SIGTERM to request termination. + +Use the *exact same* command line to resume the operation later - changing excludes +or paths will lead to inconsistencies (changed excludes will only apply to newly +processed files/dirs). Changing compression leads to incorrect size information +(which does not cause any data loss, but can be misleading). + +USE WITH CAUTION. Permanent data loss by specifying incorrect patterns is possible. + +Note: The archive under rewrite is only removed after the operation completes. The + archive that is built during the rewrite exists at the same time at + .rewrite. + +Note: When recompressing or (especially) rechunking space usage can be substantial. + +Note: This changes the archive ID.