@@ -273,7 +273,7 @@ class S3FileSystem(AsyncFileSystem):
273
273
connect_timeout = 5
274
274
retries = 5
275
275
read_timeout = 15
276
- default_block_size = 5 * 2 ** 20
276
+ default_block_size = 50 * 2 ** 20
277
277
protocol = ("s3" , "s3a" )
278
278
_extra_tokenize_attributes = ("default_block_size" ,)
279
279
@@ -299,7 +299,7 @@ def __init__(
299
299
cache_regions = False ,
300
300
asynchronous = False ,
301
301
loop = None ,
302
- max_concurrency = 1 ,
302
+ max_concurrency = 10 ,
303
303
fixed_upload_size : bool = False ,
304
304
** kwargs ,
305
305
):
@@ -1133,8 +1133,11 @@ async def _call_and_read():
1133
1133
1134
1134
return await _error_wrapper (_call_and_read , retries = self .retries )
1135
1135
1136
- async def _pipe_file (self , path , data , chunksize = 50 * 2 ** 20 , ** kwargs ):
1136
+ async def _pipe_file (
1137
+ self , path , data , chunksize = 50 * 2 ** 20 , max_concurrency = None , ** kwargs
1138
+ ):
1137
1139
bucket , key , _ = self .split_path (path )
1140
+ concurrency = max_concurrency or self .max_concurrency
1138
1141
size = len (data )
1139
1142
# 5 GB is the limit for an S3 PUT
1140
1143
if size < min (5 * 2 ** 30 , 2 * chunksize ):
@@ -1146,23 +1149,27 @@ async def _pipe_file(self, path, data, chunksize=50 * 2**20, **kwargs):
1146
1149
mpu = await self ._call_s3 (
1147
1150
"create_multipart_upload" , Bucket = bucket , Key = key , ** kwargs
1148
1151
)
1149
-
1150
- # TODO: cancel MPU if the following fails
1151
- out = [
1152
- await self ._call_s3 (
1153
- "upload_part" ,
1154
- Bucket = bucket ,
1155
- PartNumber = i + 1 ,
1156
- UploadId = mpu ["UploadId" ],
1157
- Body = data [off : off + chunksize ],
1158
- Key = key ,
1152
+ ranges = list (range (0 , len (data ), chunksize ))
1153
+ inds = list (range (0 , len (ranges ), concurrency )) + [len (ranges )]
1154
+ parts = []
1155
+ for start , stop in zip (inds [:- 1 ], inds [1 :]):
1156
+ out = await asyncio .gather (
1157
+ * [
1158
+ self ._call_s3 (
1159
+ "upload_part" ,
1160
+ Bucket = bucket ,
1161
+ PartNumber = i + 1 ,
1162
+ UploadId = mpu ["UploadId" ],
1163
+ Body = data [ranges [i ] : ranges [i ] + chunksize ],
1164
+ Key = key ,
1165
+ )
1166
+ for i in range (start , stop )
1167
+ ]
1168
+ )
1169
+ parts .extend (
1170
+ {"PartNumber" : i + 1 , "ETag" : o ["ETag" ]}
1171
+ for i , o in zip (range (start , stop ), out )
1159
1172
)
1160
- for i , off in enumerate (range (0 , len (data ), chunksize ))
1161
- ]
1162
-
1163
- parts = [
1164
- {"PartNumber" : i + 1 , "ETag" : o ["ETag" ]} for i , o in enumerate (out )
1165
- ]
1166
1173
await self ._call_s3 (
1167
1174
"complete_multipart_upload" ,
1168
1175
Bucket = bucket ,
@@ -2145,7 +2152,7 @@ def __init__(
2145
2152
s3 ,
2146
2153
path ,
2147
2154
mode = "rb" ,
2148
- block_size = 5 * 2 ** 20 ,
2155
+ block_size = 50 * 2 ** 20 ,
2149
2156
acl = False ,
2150
2157
version_id = None ,
2151
2158
fill_cache = True ,
@@ -2365,6 +2372,7 @@ def n_bytes_left() -> int:
2365
2372
return len (self .buffer .getbuffer ()) - self .buffer .tell ()
2366
2373
2367
2374
min_chunk = 1 if final else self .blocksize
2375
+ # TODO: concurrent here
2368
2376
if self .fs .fixed_upload_size :
2369
2377
# all chunks have fixed size, exception: last one can be smaller
2370
2378
while n_bytes_left () >= min_chunk :
0 commit comments