From f890ef4649dc7ad6e090614732a34a03946ac863 Mon Sep 17 00:00:00 2001 From: mmbugua Date: Fri, 27 Jan 2023 09:59:27 -0800 Subject: [PATCH 01/12] Enable users to copy both files and directories --- .../services/contents/filemanager.py | 233 +++++++++++++++++- tests/services/contents/test_api.py | 33 ++- tests/services/contents/test_manager.py | 94 +++++++ 3 files changed, 347 insertions(+), 13 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 53b3c34d62..b9bfb06863 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -2,10 +2,12 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import errno +import math import mimetypes import os import shutil import stat +import subprocess import sys import warnings from datetime import datetime @@ -25,7 +27,7 @@ from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from .fileio import AsyncFileManagerMixin, FileManagerMixin -from .manager import AsyncContentsManager, ContentsManager +from .manager import AsyncContentsManager, ContentsManager, copy_pat try: from os.path import samefile @@ -600,6 +602,119 @@ def get_kernel_path(self, path, model=None): parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir + def copy(self, from_path: str, to_path=None): + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + self.check_folder_size(path) + else: + # let the super class handle copying files + return super().copy(from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str): + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into Folder '{to_path}'", + ) from err + + return model + + def check_folder_size(self, path: str): + """ + limit the size of folders being copied to prevent a timeout error + """ + limit_mb = 100 + limit_str = f"{limit_mb}MB" + limit_bytes = limit_mb * 1024 * 1024 + size = int(self._get_dir_size(self._get_os_path(path))) + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {limit_str}, + "{path}" is {self._human_readable_size(size)} + """, + ) + + def _get_dir_size(self, path: str = "."): + """ + calls the command line program du to get the directory size + """ + try: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + def _human_readable_size(self, size: int): + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) + class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): """An async file contents manager.""" @@ -955,3 +1070,119 @@ async def get_kernel_path(self, path, model=None): return path parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir + + async def copy(self, from_path: str, to_path=None) -> dict: + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = await self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + await self.check_folder_size(path) + else: + # let the super class handle copying files + return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if await self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = await super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return await self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + async def _copy_dir( + self, from_path: str, to_path_original: str, to_name: str, to_path: str + ) -> dict: + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = await self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into read-only Folder '{to_path}'", + ) from err + + return model + + async def check_folder_size(self, path: str) -> None: + """ + limit the size of folders being copied to prevent a timeout error + + """ + limit_mb = 100 + limit_str = f"{limit_mb}MB" + limit_bytes = limit_mb * 1024 * 1024 + size = int(await self._get_dir_size(self._get_os_path(path))) + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {limit_str}, + "{path}" is {await self._human_readable_size(size)} + """, + ) + + async def _get_dir_size(self, path: str = ".") -> str: + """ + calls the command line program du to get the directory size + """ + try: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + async def _human_readable_size(self, size: int) -> str: + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index e95ae11255..252f368985 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created): _check_created(r, str(contents_dir), path, copy3, type="notebook") +async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created): + # created a nest copy of a the original folder + dest_dir = "foo" + path = "parent" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, dest_dir, type="directory") + + # copy to a folder where a similar name exists + dest_dir = "foo" + path = "parent" + copy_dir = f"{dest_dir}-Copy1" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, copy_dir, type="directory") + + async def test_copy_path(jp_fetch, contents, contents_dir, _check_created): path1 = "foo" path2 = "å b" @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden( assert expected_http_error(e, 400) -async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created): - with pytest.raises(tornado.httpclient.HTTPClientError) as e: - await jp_fetch( - "api", - "contents", - "foo", - method="POST", - body=json.dumps({"copy_from": "å b"}), - ) - assert expected_http_error(e, 400) - - @pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows") async def test_copy_400_hidden( jp_fetch, diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index ab6960226b..ebdf0fa8d4 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -1,4 +1,5 @@ import os +import shutil import sys import time from itertools import combinations @@ -52,6 +53,52 @@ def _make_dir(jp_contents_manager, api_path): print("Directory already exists: %r" % os_path) +def _make_big_dir(contents_manager, api_path): + # make a directory that is over 100 MB in size + os_path = contents_manager._get_os_path(api_path) + try: + os.makedirs(os_path) + # textFile = open(f"{os_path}/demofile.txt", "a") + # textFile.write( + # """ + # Lorem ipsum dolor sit amet, consectetur adipiscing elit, + # sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + # Ut enim ad minim veniam, quis nostrud exercitation ullamco + # laboris nisi ut aliquip ex ea commodo consequat. + # Duis aute irure dolor in reprehenderit in voluptate + # velit esse cillum dolore eu fugiat nulla pariatur. + # Excepteur sint occaecat cupidatat non proident, + # sunt in culpa qui officia deserunt mollit anim id est laborum. + # """ + # ) + # textFile.close() + + with open(f"{os_path}/demofile.txt", "a") as textFile: + textFile.write( + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, + sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco + laboris nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate + velit esse cillum dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, + sunt in culpa qui officia deserunt mollit anim id est laborum. + """ + ) + + for i in range(200): + os.makedirs(f"{os_path}/subfolder-{i}") + for j in range(200): + shutil.copy( + f"{os_path}/demofile.txt", + f"{os_path}/subfolder-{i}/testfile{j}.txt", + ) + + except OSError as err: + print("Directory already exists", err) + + def symlink(jp_contents_manager, src, dst): """Make a symlink to src from dst @@ -815,6 +862,53 @@ async def test_copy(jp_contents_manager): assert copy3["path"] == "copy 3.ipynb" +async def test_copy_dir(jp_contents_manager): + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + nonExistantDir = "FolderDoesNotExist" + + _make_dir(cm, destDir) + _make_dir(cm, sourceDir) + + nestedDir = f"{destDir}/{sourceDir}" + + # copy one folder insider another folder + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == nestedDir + + # need to test when copying in a directory where the another folder with the same name exists + _make_dir(cm, nestedDir) + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == f"{nestedDir}-Copy1" + + # need to test for when copying in the same path as the sourceDir + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path="")) + assert copy["path"] == f"{sourceDir}-Copy1" + + # ensure its still possible to copy a folder to another folder that doesn't exist + copy = await ensure_async( + cm.copy( + from_path=sourceDir, + to_path=nonExistantDir, + ) + ) + assert copy["path"] == f"{nonExistantDir}/{sourceDir}" + + +async def test_copy_big_dir(jp_contents_manager): + # this tests how the Content API limits prevents copying folders that more than 100MB in size + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + _make_dir(cm, destDir) + _make_big_dir(contents_manager=cm, api_path=sourceDir) + with pytest.raises(HTTPError) as exc_info: + await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + + assert exc_info.type is HTTPError + + async def test_mark_trusted_cells(jp_contents_manager): cm = jp_contents_manager nb, name, path = await new_notebook(cm) From 57200e86d3ce2044669e526bcd0feb35789b456c Mon Sep 17 00:00:00 2001 From: mmbugua Date: Thu, 2 Feb 2023 22:34:30 -0800 Subject: [PATCH 02/12] Fix issues for windows test for the test_copy_big_dir --- tests/services/contents/test_manager.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index ebdf0fa8d4..c0201c9d73 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -1,4 +1,5 @@ import os +import platform import shutil import sys import time @@ -87,7 +88,17 @@ def _make_big_dir(contents_manager, api_path): """ ) - for i in range(200): + # since shutil.copy in Windows can't copy the metadata for files + # the file size of the copied file ends up being smaller than the original + # we only want to increase the number of folder if the tests is being run on + # windows, otherwise the for loop doesn't need to run as long as this + # function is already slow enough + if platform.system() == "Windows": + num_sub_folders = 500 + else: + num_sub_folders = 200 + + for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") for j in range(200): shutil.copy( From 7fc89fb846033f97a2a7f8b8aaf4bb199e9849fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Feb 2023 06:34:51 +0000 Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/services/contents/test_manager.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index c0201c9d73..89a52cb22b 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -93,10 +93,7 @@ def _make_big_dir(contents_manager, api_path): # we only want to increase the number of folder if the tests is being run on # windows, otherwise the for loop doesn't need to run as long as this # function is already slow enough - if platform.system() == "Windows": - num_sub_folders = 500 - else: - num_sub_folders = 200 + num_sub_folders = 500 if platform.system() == "Windows" else 200 for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") From aa8b2527170a4c40de5fc84e6cced55d5e013579 Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Thu, 9 Feb 2023 16:07:51 -0800 Subject: [PATCH 04/12] Fix tests for macos platform --- .../services/contents/filemanager.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index b9bfb06863..3911f7a8b6 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -5,6 +5,7 @@ import math import mimetypes import os +import platform import shutil import stat import subprocess @@ -673,6 +674,8 @@ def check_folder_size(self, path: str): limit_str = f"{limit_mb}MB" limit_bytes = limit_mb * 1024 * 1024 size = int(self._get_dir_size(self._get_os_path(path))) + size = size * 1024 if platform.system() == "Darwin" else size + if size > limit_bytes: raise web.HTTPError( 400, @@ -687,9 +690,19 @@ def _get_dir_size(self, path: str = "."): calls the command line program du to get the directory size """ try: - result = subprocess.run( - ["du", "-s", "--block-size=1", path], capture_output=True - ).stdout.split() + # result = subprocess.run( + # ["du", "-s", "--block-size=1", path], capture_output=True + # ).stdout.split() + # self.log.info(f"current status of du command {result}") + # size = result[0].decode("utf-8") + if platform.system() == "Darwin": + result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() + + else: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") size = result[0].decode("utf-8") except Exception as err: @@ -1145,6 +1158,7 @@ async def check_folder_size(self, path: str) -> None: limit_str = f"{limit_mb}MB" limit_bytes = limit_mb * 1024 * 1024 size = int(await self._get_dir_size(self._get_os_path(path))) + size = size * 1024 if platform.system() == "Darwin" else size if size > limit_bytes: raise web.HTTPError( 400, @@ -1159,9 +1173,18 @@ async def _get_dir_size(self, path: str = ".") -> str: calls the command line program du to get the directory size """ try: - result = subprocess.run( - ["du", "-s", "--block-size=1", path], capture_output=True - ).stdout.split() + # result = subprocess.run( + # ["du", "-s", "--block-size=1", path], capture_output=True + # ).stdout.split() + + if platform.system() == "Darwin": + result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() + + else: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") size = result[0].decode("utf-8") except Exception as err: From 80df3377f547533fd8378545a421cdac4b0bd37b Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Thu, 9 Feb 2023 18:29:06 -0800 Subject: [PATCH 05/12] Fix linting issues --- jupyter_server/services/contents/filemanager.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 3911f7a8b6..2bb4290e9c 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -603,7 +603,7 @@ def get_kernel_path(self, path, model=None): parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir - def copy(self, from_path: str, to_path=None): + def copy(self, from_path, to_path=None): """ Copy an existing file or directory and return its new model. If to_path not specified, it will be the parent directory of from_path. @@ -647,7 +647,7 @@ def copy(self, from_path: str, to_path=None): to_path=to_path, ) - def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str): + def _copy_dir(self, from_path, to_path_original, to_name, to_path): """ handles copying directories returns the model for the copied directory @@ -666,7 +666,7 @@ def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path return model - def check_folder_size(self, path: str): + def check_folder_size(self, path): """ limit the size of folders being copied to prevent a timeout error """ @@ -685,7 +685,7 @@ def check_folder_size(self, path: str): """, ) - def _get_dir_size(self, path: str = "."): + def _get_dir_size(self, path = "."): """ calls the command line program du to get the directory size """ @@ -716,7 +716,7 @@ def _get_dir_size(self, path: str = "."): ) from err return size - def _human_readable_size(self, size: int): + def _human_readable_size(self, size): """ returns folder size in a human readable format """ @@ -1084,7 +1084,7 @@ async def get_kernel_path(self, path, model=None): parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir - async def copy(self, from_path: str, to_path=None) -> dict: + async def copy(self, from_path, to_path=None): """ Copy an existing file or directory and return its new model. If to_path not specified, it will be the parent directory of from_path. From d7520ed2cf4a5d2975b05389752d07f9b2d60bd2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Feb 2023 02:29:29 +0000 Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_server/services/contents/filemanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 2bb4290e9c..2dae125b31 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -685,7 +685,7 @@ def check_folder_size(self, path): """, ) - def _get_dir_size(self, path = "."): + def _get_dir_size(self, path="."): """ calls the command line program du to get the directory size """ From 8ab6fb540bb2d6b31bfebdf95531c0ff23bb0fa8 Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Sat, 11 Feb 2023 20:02:34 -0800 Subject: [PATCH 07/12] add max_copy_folder_size_mb trait and adjust tests --- jupyter_server/services/contents/filemanager.py | 7 +++++-- jupyter_server/services/contents/manager.py | 16 +++++++++++++++- tests/services/contents/test_manager.py | 11 +++++++---- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 2dae125b31..5baa2f11db 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -672,7 +672,8 @@ def check_folder_size(self, path): """ limit_mb = 100 limit_str = f"{limit_mb}MB" - limit_bytes = limit_mb * 1024 * 1024 + # limit_bytes = limit_mb * 1024 * 1024 + limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 size = int(self._get_dir_size(self._get_os_path(path))) size = size * 1024 if platform.system() == "Darwin" else size @@ -1156,7 +1157,9 @@ async def check_folder_size(self, path: str) -> None: """ limit_mb = 100 limit_str = f"{limit_mb}MB" - limit_bytes = limit_mb * 1024 * 1024 + # limit_bytes = limit_mb * 1024 * 1024 + limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 + size = int(await self._get_dir_size(self._get_os_path(path))) size = size * 1024 if platform.system() == "Darwin" else size if size > limit_bytes: diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index c1e38cc914..1011b3e09d 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -15,7 +15,19 @@ from nbformat import validate as validate_nb from nbformat.v4 import new_notebook from tornado.web import HTTPError, RequestHandler -from traitlets import Any, Bool, Dict, Instance, List, TraitError, Type, Unicode, default, validate +from traitlets import ( + Any, + Bool, + Dict, + Instance, + Int, + List, + TraitError, + Type, + Unicode, + default, + validate, +) from traitlets.config.configurable import LoggingConfigurable from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI @@ -121,6 +133,8 @@ def _notary_default(self): """, ) + max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") + untitled_notebook = Unicode( _i18n("Untitled"), config=True, diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 89a52cb22b..ec9f048a36 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -91,9 +91,10 @@ def _make_big_dir(contents_manager, api_path): # since shutil.copy in Windows can't copy the metadata for files # the file size of the copied file ends up being smaller than the original # we only want to increase the number of folder if the tests is being run on - # windows, otherwise the for loop doesn't need to run as long as this - # function is already slow enough - num_sub_folders = 500 if platform.system() == "Windows" else 200 + # windows, + big_dir_size = contents_manager.max_copy_folder_size_mb * 3 + print(f"the size of the big folder for testing is {big_dir_size}") + num_sub_folders = big_dir_size if platform.system() == "Windows" else big_dir_size * 3 for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") @@ -905,10 +906,12 @@ async def test_copy_dir(jp_contents_manager): async def test_copy_big_dir(jp_contents_manager): - # this tests how the Content API limits prevents copying folders that more than 100MB in size + # this tests how the Content API limits preventing copying folders that are more than + # the size limit specified in max_copy_folder_size_mb trait cm = jp_contents_manager destDir = "Untitled Folder 1" sourceDir = "Morningstar Notebooks" + cm.max_copy_folder_size_mb = 5 _make_dir(cm, destDir) _make_big_dir(contents_manager=cm, api_path=sourceDir) with pytest.raises(HTTPError) as exc_info: From 79a0192c69d429ee7b7f77554b500adda02d4773 Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Sat, 11 Feb 2023 20:18:52 -0800 Subject: [PATCH 08/12] fixup! add max_copy_folder_size_mb trait and adjust tests --- tests/services/contents/test_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index ec9f048a36..c2d8d6d9e4 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -94,7 +94,7 @@ def _make_big_dir(contents_manager, api_path): # windows, big_dir_size = contents_manager.max_copy_folder_size_mb * 3 print(f"the size of the big folder for testing is {big_dir_size}") - num_sub_folders = big_dir_size if platform.system() == "Windows" else big_dir_size * 3 + num_sub_folders = big_dir_size if platform.system() != "Windows" else big_dir_size * 3 for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") From 8f0bdf3e7221a4d1fddd0ccf139fc255621586c1 Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Sat, 11 Feb 2023 21:04:42 -0800 Subject: [PATCH 09/12] fixup! add max_copy_folder_size_mb trait and adjust tests --- .../services/contents/filemanager.py | 30 +++++++------------ tests/services/contents/test_manager.py | 8 ++--- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 5baa2f11db..e1d94664aa 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -670,18 +670,16 @@ def check_folder_size(self, path): """ limit the size of folders being copied to prevent a timeout error """ - limit_mb = 100 - limit_str = f"{limit_mb}MB" - # limit_bytes = limit_mb * 1024 * 1024 limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 size = int(self._get_dir_size(self._get_os_path(path))) + # convert from KB to Bytes for macOS size = size * 1024 if platform.system() == "Darwin" else size if size > limit_bytes: raise web.HTTPError( 400, f""" - Can't copy folders larger than {limit_str}, + Can't copy folders larger than {self.max_copy_folder_size_mb}MB, "{path}" is {self._human_readable_size(size)} """, ) @@ -691,14 +689,9 @@ def _get_dir_size(self, path="."): calls the command line program du to get the directory size """ try: - # result = subprocess.run( - # ["du", "-s", "--block-size=1", path], capture_output=True - # ).stdout.split() - # self.log.info(f"current status of du command {result}") - # size = result[0].decode("utf-8") if platform.system() == "Darwin": + # retuns the size of the folder in KB result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() - else: result = subprocess.run( ["du", "-s", "--block-size=1", path], capture_output=True @@ -1152,21 +1145,22 @@ async def _copy_dir( async def check_folder_size(self, path: str) -> None: """ - limit the size of folders being copied to prevent a timeout error + limit the size of folders being copied to be no more than the + trait max_copy_folder_size_mb + + prevent a timeout error """ - limit_mb = 100 - limit_str = f"{limit_mb}MB" - # limit_bytes = limit_mb * 1024 * 1024 limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 size = int(await self._get_dir_size(self._get_os_path(path))) + # convert from KB to Bytes for macOS size = size * 1024 if platform.system() == "Darwin" else size if size > limit_bytes: raise web.HTTPError( 400, f""" - Can't copy folders larger than {limit_str}, + Can't copy folders larger than {self.max_copy_folder_size_mb}MB, "{path}" is {await self._human_readable_size(size)} """, ) @@ -1176,13 +1170,9 @@ async def _get_dir_size(self, path: str = ".") -> str: calls the command line program du to get the directory size """ try: - # result = subprocess.run( - # ["du", "-s", "--block-size=1", path], capture_output=True - # ).stdout.split() - if platform.system() == "Darwin": + # retuns the size of the folder in KB result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() - else: result = subprocess.run( ["du", "-s", "--block-size=1", path], capture_output=True diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index c2d8d6d9e4..f46dbfa39b 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -92,10 +92,10 @@ def _make_big_dir(contents_manager, api_path): # the file size of the copied file ends up being smaller than the original # we only want to increase the number of folder if the tests is being run on # windows, - big_dir_size = contents_manager.max_copy_folder_size_mb * 3 - print(f"the size of the big folder for testing is {big_dir_size}") - num_sub_folders = big_dir_size if platform.system() != "Windows" else big_dir_size * 3 - + # big_dir_size = contents_manager.max_copy_folder_size_mb * 3 + # print(f"the size of the big folder for testing is {big_dir_size}") + # num_sub_folders = big_dir_size if platform.system() != "Windows" else big_dir_size * 3 + num_sub_folders = contents_manager.max_copy_folder_size_mb * 10 for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") for j in range(200): From 6092b311630e21a7691061b197550a563c18e790 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 Feb 2023 05:05:09 +0000 Subject: [PATCH 10/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_server/services/contents/filemanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index e1d94664aa..7492d8fca7 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -1147,7 +1147,7 @@ async def check_folder_size(self, path: str) -> None: """ limit the size of folders being copied to be no more than the trait max_copy_folder_size_mb - + prevent a timeout error """ From e5d65a1190d91001fd264ff169ddc427670a637a Mon Sep 17 00:00:00 2001 From: mosesmbugua Date: Sat, 11 Feb 2023 21:34:17 -0800 Subject: [PATCH 11/12] remove old code that is commented out --- .../services/contents/filemanager.py | 8 +++---- tests/services/contents/test_manager.py | 22 ------------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 7492d8fca7..e38c44c1f0 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -668,7 +668,8 @@ def _copy_dir(self, from_path, to_path_original, to_name, to_path): def check_folder_size(self, path): """ - limit the size of folders being copied to prevent a timeout error + limit the size of folders being copied to be no more than the + trait max_copy_folder_size_mb to prevent a timeout error """ limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 size = int(self._get_dir_size(self._get_os_path(path))) @@ -1146,10 +1147,7 @@ async def _copy_dir( async def check_folder_size(self, path: str) -> None: """ limit the size of folders being copied to be no more than the - trait max_copy_folder_size_mb - - prevent a timeout error - + trait max_copy_folder_size_mb to prevent a timeout error """ limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index f46dbfa39b..15ceeecd0e 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -1,5 +1,4 @@ import os -import platform import shutil import sys import time @@ -59,20 +58,6 @@ def _make_big_dir(contents_manager, api_path): os_path = contents_manager._get_os_path(api_path) try: os.makedirs(os_path) - # textFile = open(f"{os_path}/demofile.txt", "a") - # textFile.write( - # """ - # Lorem ipsum dolor sit amet, consectetur adipiscing elit, - # sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. - # Ut enim ad minim veniam, quis nostrud exercitation ullamco - # laboris nisi ut aliquip ex ea commodo consequat. - # Duis aute irure dolor in reprehenderit in voluptate - # velit esse cillum dolore eu fugiat nulla pariatur. - # Excepteur sint occaecat cupidatat non proident, - # sunt in culpa qui officia deserunt mollit anim id est laborum. - # """ - # ) - # textFile.close() with open(f"{os_path}/demofile.txt", "a") as textFile: textFile.write( @@ -88,13 +73,6 @@ def _make_big_dir(contents_manager, api_path): """ ) - # since shutil.copy in Windows can't copy the metadata for files - # the file size of the copied file ends up being smaller than the original - # we only want to increase the number of folder if the tests is being run on - # windows, - # big_dir_size = contents_manager.max_copy_folder_size_mb * 3 - # print(f"the size of the big folder for testing is {big_dir_size}") - # num_sub_folders = big_dir_size if platform.system() != "Windows" else big_dir_size * 3 num_sub_folders = contents_manager.max_copy_folder_size_mb * 10 for i in range(num_sub_folders): os.makedirs(f"{os_path}/subfolder-{i}") From f8593078a55df40b6d264b53c07f5bc9f5bd623b Mon Sep 17 00:00:00 2001 From: foo Date: Wed, 22 Feb 2023 19:08:04 -0800 Subject: [PATCH 12/12] Move the max_copy_folder_size_mb trait from ContentsManager to FileContentsManager --- jupyter_server/services/contents/filemanager.py | 4 +++- jupyter_server/services/contents/manager.py | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index e38c44c1f0..83eefce397 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -19,7 +19,7 @@ from jupyter_core.paths import exists, is_file_hidden, is_hidden from send2trash import send2trash from tornado import web -from traitlets import Bool, TraitError, Unicode, default, validate +from traitlets import Bool, Int, TraitError, Unicode, default, validate from jupyter_server import _tz as tz from jupyter_server.base.handlers import AuthenticatedFileHandler @@ -44,6 +44,8 @@ class FileContentsManager(FileManagerMixin, ContentsManager): root_dir = Unicode(config=True) + max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") + @default("root_dir") def _default_root_dir(self): try: diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 1011b3e09d..b6ef67e1a5 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -20,7 +20,6 @@ Bool, Dict, Instance, - Int, List, TraitError, Type, @@ -133,8 +132,6 @@ def _notary_default(self): """, ) - max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") - untitled_notebook = Unicode( _i18n("Untitled"), config=True,