|
2 | 2 | # Copyright (c) Jupyter Development Team.
|
3 | 3 | # Distributed under the terms of the Modified BSD License.
|
4 | 4 | import errno
|
| 5 | +import math |
5 | 6 | import mimetypes
|
6 | 7 | import os
|
7 | 8 | import shutil
|
8 | 9 | import stat
|
| 10 | +import subprocess |
9 | 11 | import sys
|
10 | 12 | import warnings
|
11 | 13 | from datetime import datetime
|
|
25 | 27 |
|
26 | 28 | from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
|
27 | 29 | from .fileio import AsyncFileManagerMixin, FileManagerMixin
|
28 |
| -from .manager import AsyncContentsManager, ContentsManager |
| 30 | +from .manager import AsyncContentsManager, ContentsManager, copy_pat |
29 | 31 |
|
30 | 32 | try:
|
31 | 33 | from os.path import samefile
|
@@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None):
|
602 | 604 | parent_dir = ""
|
603 | 605 | return parent_dir
|
604 | 606 |
|
| 607 | + def copy(self, from_path: str, to_path=None): |
| 608 | + """ |
| 609 | + Copy an existing file or directory and return its new model. |
| 610 | + If to_path not specified, it will be the parent directory of from_path. |
| 611 | + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. |
| 612 | + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. |
| 613 | + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. |
| 614 | + from_path must be a full path to a file or directory. |
| 615 | + """ |
| 616 | + to_path_original = str(to_path) |
| 617 | + path = from_path.strip("/") |
| 618 | + if to_path is not None: |
| 619 | + to_path = to_path.strip("/") |
| 620 | + |
| 621 | + if "/" in path: |
| 622 | + from_dir, from_name = path.rsplit("/", 1) |
| 623 | + else: |
| 624 | + from_dir = "" |
| 625 | + from_name = path |
| 626 | + |
| 627 | + model = self.get(path) |
| 628 | + # limit the size of folders being copied to prevent a timeout error |
| 629 | + if model["type"] == "directory": |
| 630 | + self.check_folder_size(path) |
| 631 | + else: |
| 632 | + # let the super class handle copying files |
| 633 | + return super().copy(from_path=from_path, to_path=to_path) |
| 634 | + |
| 635 | + is_destination_specified = to_path is not None |
| 636 | + to_name = copy_pat.sub(".", from_name) |
| 637 | + if not is_destination_specified: |
| 638 | + to_path = from_dir |
| 639 | + if self.dir_exists(to_path): |
| 640 | + name = copy_pat.sub(".", from_name) |
| 641 | + to_name = super().increment_filename(name, to_path, insert="-Copy") |
| 642 | + to_path = f"{to_path}/{to_name}" |
| 643 | + |
| 644 | + return self._copy_dir( |
| 645 | + from_path=from_path, |
| 646 | + to_path_original=to_path_original, |
| 647 | + to_name=to_name, |
| 648 | + to_path=to_path, |
| 649 | + ) |
| 650 | + |
| 651 | + def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str): |
| 652 | + """ |
| 653 | + handles copying directories |
| 654 | + returns the model for the copied directory |
| 655 | + """ |
| 656 | + try: |
| 657 | + os_from_path = self._get_os_path(from_path.strip("/")) |
| 658 | + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' |
| 659 | + shutil.copytree(os_from_path, os_to_path) |
| 660 | + model = self.get(to_path, content=False) |
| 661 | + except OSError as err: |
| 662 | + self.log.error(f"OSError in _copy_dir: {err}") |
| 663 | + raise web.HTTPError( |
| 664 | + 400, |
| 665 | + f"Can't copy '{from_path}' into Folder '{to_path}'", |
| 666 | + ) from err |
| 667 | + |
| 668 | + return model |
| 669 | + |
| 670 | + def check_folder_size(self, path: str): |
| 671 | + """ |
| 672 | + limit the size of folders being copied to prevent a timeout error |
| 673 | + """ |
| 674 | + limit_mb = 100 |
| 675 | + limit_str = f"{limit_mb}MB" |
| 676 | + limit_bytes = limit_mb * 1024 * 1024 |
| 677 | + size = int(self._get_dir_size(self._get_os_path(path))) |
| 678 | + if size > limit_bytes: |
| 679 | + raise web.HTTPError( |
| 680 | + 400, |
| 681 | + f""" |
| 682 | + Can't copy folders larger than {limit_str}, |
| 683 | + "{path}" is {self._human_readable_size(size)} |
| 684 | + """, |
| 685 | + ) |
| 686 | + |
| 687 | + def _get_dir_size(self, path: str = "."): |
| 688 | + """ |
| 689 | + calls the command line program du to get the directory size |
| 690 | + """ |
| 691 | + try: |
| 692 | + result = subprocess.run( |
| 693 | + ["du", "-s", "--block-size=1", path], capture_output=True |
| 694 | + ).stdout.split() |
| 695 | + self.log.info(f"current status of du command {result}") |
| 696 | + size = result[0].decode("utf-8") |
| 697 | + except Exception as err: |
| 698 | + self.log.error(f"Error during directory copy: {err}") |
| 699 | + raise web.HTTPError( |
| 700 | + 400, |
| 701 | + f""" |
| 702 | + Unexpected error during copy operation, |
| 703 | + not able to get the size of the {path} directory |
| 704 | + """, |
| 705 | + ) from err |
| 706 | + return size |
| 707 | + |
| 708 | + def _human_readable_size(self, size: int): |
| 709 | + """ |
| 710 | + returns folder size in a human readable format |
| 711 | + """ |
| 712 | + if size == 0: |
| 713 | + return "0 Bytes" |
| 714 | + |
| 715 | + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] |
| 716 | + order = int(math.log2(size) / 10) if size else 0 |
| 717 | + |
| 718 | + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) |
| 719 | + |
605 | 720 |
|
606 | 721 | class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
|
607 | 722 | """An async file contents manager."""
|
@@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None):
|
959 | 1074 | else:
|
960 | 1075 | parent_dir = ""
|
961 | 1076 | return parent_dir
|
| 1077 | + |
| 1078 | + async def copy(self, from_path: str, to_path=None) -> dict: |
| 1079 | + """ |
| 1080 | + Copy an existing file or directory and return its new model. |
| 1081 | + If to_path not specified, it will be the parent directory of from_path. |
| 1082 | + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. |
| 1083 | + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. |
| 1084 | + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. |
| 1085 | + from_path must be a full path to a file or directory. |
| 1086 | + """ |
| 1087 | + to_path_original = str(to_path) |
| 1088 | + path = from_path.strip("/") |
| 1089 | + if to_path is not None: |
| 1090 | + to_path = to_path.strip("/") |
| 1091 | + |
| 1092 | + if "/" in path: |
| 1093 | + from_dir, from_name = path.rsplit("/", 1) |
| 1094 | + else: |
| 1095 | + from_dir = "" |
| 1096 | + from_name = path |
| 1097 | + |
| 1098 | + model = await self.get(path) |
| 1099 | + # limit the size of folders being copied to prevent a timeout error |
| 1100 | + if model["type"] == "directory": |
| 1101 | + await self.check_folder_size(path) |
| 1102 | + else: |
| 1103 | + # let the super class handle copying files |
| 1104 | + return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path) |
| 1105 | + |
| 1106 | + is_destination_specified = to_path is not None |
| 1107 | + to_name = copy_pat.sub(".", from_name) |
| 1108 | + if not is_destination_specified: |
| 1109 | + to_path = from_dir |
| 1110 | + if await self.dir_exists(to_path): |
| 1111 | + name = copy_pat.sub(".", from_name) |
| 1112 | + to_name = await super().increment_filename(name, to_path, insert="-Copy") |
| 1113 | + to_path = f"{to_path}/{to_name}" |
| 1114 | + |
| 1115 | + return await self._copy_dir( |
| 1116 | + from_path=from_path, |
| 1117 | + to_path_original=to_path_original, |
| 1118 | + to_name=to_name, |
| 1119 | + to_path=to_path, |
| 1120 | + ) |
| 1121 | + |
| 1122 | + async def _copy_dir( |
| 1123 | + self, from_path: str, to_path_original: str, to_name: str, to_path: str |
| 1124 | + ) -> dict: |
| 1125 | + """ |
| 1126 | + handles copying directories |
| 1127 | + returns the model for the copied directory |
| 1128 | + """ |
| 1129 | + try: |
| 1130 | + os_from_path = self._get_os_path(from_path.strip("/")) |
| 1131 | + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' |
| 1132 | + shutil.copytree(os_from_path, os_to_path) |
| 1133 | + model = await self.get(to_path, content=False) |
| 1134 | + except OSError as err: |
| 1135 | + self.log.error(f"OSError in _copy_dir: {err}") |
| 1136 | + raise web.HTTPError( |
| 1137 | + 400, |
| 1138 | + f"Can't copy '{from_path}' into read-only Folder '{to_path}'", |
| 1139 | + ) from err |
| 1140 | + |
| 1141 | + return model |
| 1142 | + |
| 1143 | + async def check_folder_size(self, path: str) -> None: |
| 1144 | + """ |
| 1145 | + limit the size of folders being copied to prevent a timeout error |
| 1146 | +
|
| 1147 | + """ |
| 1148 | + limit_mb = 100 |
| 1149 | + limit_str = f"{limit_mb}MB" |
| 1150 | + limit_bytes = limit_mb * 1024 * 1024 |
| 1151 | + size = int(await self._get_dir_size(self._get_os_path(path))) |
| 1152 | + if size > limit_bytes: |
| 1153 | + raise web.HTTPError( |
| 1154 | + 400, |
| 1155 | + f""" |
| 1156 | + Can't copy folders larger than {limit_str}, |
| 1157 | + "{path}" is {await self._human_readable_size(size)} |
| 1158 | + """, |
| 1159 | + ) |
| 1160 | + |
| 1161 | + async def _get_dir_size(self, path: str = ".") -> str: |
| 1162 | + """ |
| 1163 | + calls the command line program du to get the directory size |
| 1164 | + """ |
| 1165 | + try: |
| 1166 | + result = subprocess.run( |
| 1167 | + ["du", "-s", "--block-size=1", path], capture_output=True |
| 1168 | + ).stdout.split() |
| 1169 | + self.log.info(f"current status of du command {result}") |
| 1170 | + size = result[0].decode("utf-8") |
| 1171 | + except Exception as err: |
| 1172 | + self.log.error(f"Error during directory copy: {err}") |
| 1173 | + raise web.HTTPError( |
| 1174 | + 400, |
| 1175 | + f""" |
| 1176 | + Unexpected error during copy operation, |
| 1177 | + not able to get the size of the {path} directory |
| 1178 | + """, |
| 1179 | + ) from err |
| 1180 | + return size |
| 1181 | + |
| 1182 | + async def _human_readable_size(self, size: int) -> str: |
| 1183 | + """ |
| 1184 | + returns folder size in a human readable format |
| 1185 | + """ |
| 1186 | + if size == 0: |
| 1187 | + return "0 Bytes" |
| 1188 | + |
| 1189 | + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] |
| 1190 | + order = int(math.log2(size) / 10) if size else 0 |
| 1191 | + |
| 1192 | + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) |
0 commit comments