Skip to content

Commit 52254d4

Browse files
author
mmbugua
committed
Enable users to copy both files and directories
1 parent b6c1edb commit 52254d4

File tree

3 files changed

+347
-13
lines changed

3 files changed

+347
-13
lines changed

jupyter_server/services/contents/filemanager.py

+232-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
# Copyright (c) Jupyter Development Team.
33
# Distributed under the terms of the Modified BSD License.
44
import errno
5+
import math
56
import mimetypes
67
import os
78
import shutil
89
import stat
10+
import subprocess
911
import sys
1012
import warnings
1113
from datetime import datetime
@@ -25,7 +27,7 @@
2527

2628
from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
2729
from .fileio import AsyncFileManagerMixin, FileManagerMixin
28-
from .manager import AsyncContentsManager, ContentsManager
30+
from .manager import AsyncContentsManager, ContentsManager, copy_pat
2931

3032
try:
3133
from os.path import samefile
@@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None):
602604
parent_dir = ""
603605
return parent_dir
604606

607+
def copy(self, from_path: str, to_path=None):
608+
"""
609+
Copy an existing file or directory and return its new model.
610+
If to_path not specified, it will be the parent directory of from_path.
611+
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
612+
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
613+
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
614+
from_path must be a full path to a file or directory.
615+
"""
616+
to_path_original = str(to_path)
617+
path = from_path.strip("/")
618+
if to_path is not None:
619+
to_path = to_path.strip("/")
620+
621+
if "/" in path:
622+
from_dir, from_name = path.rsplit("/", 1)
623+
else:
624+
from_dir = ""
625+
from_name = path
626+
627+
model = self.get(path)
628+
# limit the size of folders being copied to prevent a timeout error
629+
if model["type"] == "directory":
630+
self.check_folder_size(path)
631+
else:
632+
# let the super class handle copying files
633+
return super().copy(from_path=from_path, to_path=to_path)
634+
635+
is_destination_specified = to_path is not None
636+
to_name = copy_pat.sub(".", from_name)
637+
if not is_destination_specified:
638+
to_path = from_dir
639+
if self.dir_exists(to_path):
640+
name = copy_pat.sub(".", from_name)
641+
to_name = super().increment_filename(name, to_path, insert="-Copy")
642+
to_path = f"{to_path}/{to_name}"
643+
644+
return self._copy_dir(
645+
from_path=from_path,
646+
to_path_original=to_path_original,
647+
to_name=to_name,
648+
to_path=to_path,
649+
)
650+
651+
def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str):
652+
"""
653+
handles copying directories
654+
returns the model for the copied directory
655+
"""
656+
try:
657+
os_from_path = self._get_os_path(from_path.strip("/"))
658+
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
659+
shutil.copytree(os_from_path, os_to_path)
660+
model = self.get(to_path, content=False)
661+
except OSError as err:
662+
self.log.error(f"OSError in _copy_dir: {err}")
663+
raise web.HTTPError(
664+
400,
665+
f"Can't copy '{from_path}' into Folder '{to_path}'",
666+
) from err
667+
668+
return model
669+
670+
def check_folder_size(self, path: str):
671+
"""
672+
limit the size of folders being copied to prevent a timeout error
673+
"""
674+
limit_mb = 100
675+
limit_str = f"{limit_mb}MB"
676+
limit_bytes = limit_mb * 1024 * 1024
677+
size = int(self._get_dir_size(self._get_os_path(path)))
678+
if size > limit_bytes:
679+
raise web.HTTPError(
680+
400,
681+
f"""
682+
Can't copy folders larger than {limit_str},
683+
"{path}" is {self._human_readable_size(size)}
684+
""",
685+
)
686+
687+
def _get_dir_size(self, path: str = "."):
688+
"""
689+
calls the command line program du to get the directory size
690+
"""
691+
try:
692+
result = subprocess.run(
693+
["du", "-s", "--block-size=1", path], capture_output=True
694+
).stdout.split()
695+
self.log.info(f"current status of du command {result}")
696+
size = result[0].decode("utf-8")
697+
except Exception as err:
698+
self.log.error(f"Error during directory copy: {err}")
699+
raise web.HTTPError(
700+
400,
701+
f"""
702+
Unexpected error during copy operation,
703+
not able to get the size of the {path} directory
704+
""",
705+
) from err
706+
return size
707+
708+
def _human_readable_size(self, size: int):
709+
"""
710+
returns folder size in a human readable format
711+
"""
712+
if size == 0:
713+
return "0 Bytes"
714+
715+
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
716+
order = int(math.log2(size) / 10) if size else 0
717+
718+
return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
719+
605720

606721
class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
607722
"""An async file contents manager."""
@@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None):
9591074
else:
9601075
parent_dir = ""
9611076
return parent_dir
1077+
1078+
async def copy(self, from_path: str, to_path=None) -> dict:
1079+
"""
1080+
Copy an existing file or directory and return its new model.
1081+
If to_path not specified, it will be the parent directory of from_path.
1082+
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
1083+
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
1084+
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
1085+
from_path must be a full path to a file or directory.
1086+
"""
1087+
to_path_original = str(to_path)
1088+
path = from_path.strip("/")
1089+
if to_path is not None:
1090+
to_path = to_path.strip("/")
1091+
1092+
if "/" in path:
1093+
from_dir, from_name = path.rsplit("/", 1)
1094+
else:
1095+
from_dir = ""
1096+
from_name = path
1097+
1098+
model = await self.get(path)
1099+
# limit the size of folders being copied to prevent a timeout error
1100+
if model["type"] == "directory":
1101+
await self.check_folder_size(path)
1102+
else:
1103+
# let the super class handle copying files
1104+
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
1105+
1106+
is_destination_specified = to_path is not None
1107+
to_name = copy_pat.sub(".", from_name)
1108+
if not is_destination_specified:
1109+
to_path = from_dir
1110+
if await self.dir_exists(to_path):
1111+
name = copy_pat.sub(".", from_name)
1112+
to_name = await super().increment_filename(name, to_path, insert="-Copy")
1113+
to_path = f"{to_path}/{to_name}"
1114+
1115+
return await self._copy_dir(
1116+
from_path=from_path,
1117+
to_path_original=to_path_original,
1118+
to_name=to_name,
1119+
to_path=to_path,
1120+
)
1121+
1122+
async def _copy_dir(
1123+
self, from_path: str, to_path_original: str, to_name: str, to_path: str
1124+
) -> dict:
1125+
"""
1126+
handles copying directories
1127+
returns the model for the copied directory
1128+
"""
1129+
try:
1130+
os_from_path = self._get_os_path(from_path.strip("/"))
1131+
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
1132+
shutil.copytree(os_from_path, os_to_path)
1133+
model = await self.get(to_path, content=False)
1134+
except OSError as err:
1135+
self.log.error(f"OSError in _copy_dir: {err}")
1136+
raise web.HTTPError(
1137+
400,
1138+
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
1139+
) from err
1140+
1141+
return model
1142+
1143+
async def check_folder_size(self, path: str) -> None:
1144+
"""
1145+
limit the size of folders being copied to prevent a timeout error
1146+
1147+
"""
1148+
limit_mb = 100
1149+
limit_str = f"{limit_mb}MB"
1150+
limit_bytes = limit_mb * 1024 * 1024
1151+
size = int(await self._get_dir_size(self._get_os_path(path)))
1152+
if size > limit_bytes:
1153+
raise web.HTTPError(
1154+
400,
1155+
f"""
1156+
Can't copy folders larger than {limit_str},
1157+
"{path}" is {await self._human_readable_size(size)}
1158+
""",
1159+
)
1160+
1161+
async def _get_dir_size(self, path: str = ".") -> str:
1162+
"""
1163+
calls the command line program du to get the directory size
1164+
"""
1165+
try:
1166+
result = subprocess.run(
1167+
["du", "-s", "--block-size=1", path], capture_output=True
1168+
).stdout.split()
1169+
self.log.info(f"current status of du command {result}")
1170+
size = result[0].decode("utf-8")
1171+
except Exception as err:
1172+
self.log.error(f"Error during directory copy: {err}")
1173+
raise web.HTTPError(
1174+
400,
1175+
f"""
1176+
Unexpected error during copy operation,
1177+
not able to get the size of the {path} directory
1178+
""",
1179+
) from err
1180+
return size
1181+
1182+
async def _human_readable_size(self, size: int) -> str:
1183+
"""
1184+
returns folder size in a human readable format
1185+
"""
1186+
if size == 0:
1187+
return "0 Bytes"
1188+
1189+
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
1190+
order = int(math.log2(size) / 10) if size else 0
1191+
1192+
return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])

tests/services/contents/test_api.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
494494
_check_created(r, str(contents_dir), path, copy3, type="notebook")
495495

496496

497+
async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
498+
# created a nest copy of a the original folder
499+
dest_dir = "foo"
500+
path = "parent"
501+
response = await jp_fetch(
502+
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
503+
)
504+
505+
_check_created(response, str(contents_dir), path, dest_dir, type="directory")
506+
507+
# copy to a folder where a similar name exists
508+
dest_dir = "foo"
509+
path = "parent"
510+
copy_dir = f"{dest_dir}-Copy1"
511+
response = await jp_fetch(
512+
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
513+
)
514+
515+
_check_created(response, str(contents_dir), path, copy_dir, type="directory")
516+
517+
497518
async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
498519
path1 = "foo"
499520
path2 = "å b"
@@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
577598
assert expected_http_error(e, 400)
578599

579600

580-
async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
581-
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
582-
await jp_fetch(
583-
"api",
584-
"contents",
585-
"foo",
586-
method="POST",
587-
body=json.dumps({"copy_from": "å b"}),
588-
)
589-
assert expected_http_error(e, 400)
590-
591-
592601
@pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
593602
async def test_copy_400_hidden(
594603
jp_fetch,

0 commit comments

Comments
 (0)