Skip to content

Commit 473b345

Browse files
authored
feat: Revamp file reading and writing (#4906)
This PR: - changes the implementation of `readBinFile` and `readFile` to only require two system calls (`stat` + `read`) instead of one `read` per 1024 byte chunk. - fixes a bug where `Handle.getLine` would get tripped up by a NUL character in the line and cut the string off. This is caused by the fact that the original implementation uses `strlen` and `lean_mk_string` which is the backer of `mk_string` does so as well. - fixes a bug where `Handle.putStr` and thus by extension `writeFile` would get tripped up by a NUL char in the line and cut the string off. Cause here is the use of `fputs` when a NUL char is possible. Closes: #4891 Closes: #3546 Closes: #3741
1 parent 574066b commit 473b345

File tree

4 files changed

+62
-46
lines changed

4 files changed

+62
-46
lines changed

src/Init/System/IO.lean

+31-17
Original file line numberDiff line numberDiff line change
@@ -470,31 +470,23 @@ def withFile (fn : FilePath) (mode : Mode) (f : Handle → IO α) : IO α :=
470470
def Handle.putStrLn (h : Handle) (s : String) : IO Unit :=
471471
h.putStr (s.push '\n')
472472

473-
partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
473+
partial def Handle.readBinToEndInto (h : Handle) (buf : ByteArray) : IO ByteArray := do
474474
let rec loop (acc : ByteArray) : IO ByteArray := do
475475
let buf ← h.read 1024
476476
if buf.isEmpty then
477477
return acc
478478
else
479479
loop (acc ++ buf)
480-
loop ByteArray.empty
480+
loop buf
481481

482-
partial def Handle.readToEnd (h : Handle) : IO String := do
483-
let rec loop (s : String) := do
484-
let line ← h.getLine
485-
if line.isEmpty then
486-
return s
487-
else
488-
loop (s ++ line)
489-
loop ""
490-
491-
def readBinFile (fname : FilePath) : IO ByteArray := do
492-
let h ← Handle.mk fname Mode.read
493-
h.readBinToEnd
482+
partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
483+
h.readBinToEndInto .empty
494484

495-
def readFile (fname : FilePath) : IO String := do
496-
let h ← Handle.mk fname Mode.read
497-
h.readToEnd
485+
def Handle.readToEnd (h : Handle) : IO String := do
486+
let data ← h.readBinToEnd
487+
match String.fromUTF8? data with
488+
| some s => return s
489+
| none => throw <| .userError s!"Tried to read from handle containing non UTF-8 data."
498490

499491
partial def lines (fname : FilePath) : IO (Array String) := do
500492
let h ← Handle.mk fname Mode.read
@@ -600,6 +592,28 @@ end System.FilePath
600592

601593
namespace IO
602594

595+
namespace FS
596+
597+
def readBinFile (fname : FilePath) : IO ByteArray := do
598+
-- Requires metadata so defined after metadata
599+
let mdata ← fname.metadata
600+
let size := mdata.byteSize.toUSize
601+
let handle ← IO.FS.Handle.mk fname .read
602+
let buf ←
603+
if size > 0 then
604+
handle.read mdata.byteSize.toUSize
605+
else
606+
pure <| ByteArray.mkEmpty 0
607+
handle.readBinToEndInto buf
608+
609+
def readFile (fname : FilePath) : IO String := do
610+
let data ← readBinFile fname
611+
match String.fromUTF8? data with
612+
| some s => return s
613+
| none => throw <| .userError s!"Tried to read file '{fname}' containing non UTF-8 data."
614+
615+
end FS
616+
603617
def withStdin [Monad m] [MonadFinally m] [MonadLiftT BaseIO m] (h : FS.Stream) (x : m α) : m α := do
604618
let prev ← setStdin h
605619
try x finally discard <| setStdin prev

src/runtime/io.cpp

+16-29
Original file line numberDiff line numberDiff line change
@@ -485,43 +485,30 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_write(b_obj_arg h, b_obj_arg
485485
}
486486
}
487487

488-
/*
489-
Handle.getLine : (@& Handle) → IO Unit
490-
The line returned by `lean_io_prim_handle_get_line`
491-
is truncated at the first '\0' character and the
492-
rest of the line is discarded. */
488+
/* Handle.getLine : (@& Handle) → IO Unit */
493489
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_get_line(b_obj_arg h, obj_arg /* w */) {
494490
FILE * fp = io_get_handle(h);
495-
const int buf_sz = 64;
496-
char buf_str[buf_sz]; // NOLINT
497-
std::string result;
498-
bool first = true;
499-
while (true) {
500-
char * out = std::fgets(buf_str, buf_sz, fp);
501-
if (out != nullptr) {
502-
if (strlen(buf_str) < buf_sz-1 || buf_str[buf_sz-2] == '\n') {
503-
if (first) {
504-
return io_result_mk_ok(mk_string(out));
505-
} else {
506-
result.append(out);
507-
return io_result_mk_ok(mk_string(result));
508-
}
509-
}
510-
result.append(out);
511-
} else if (std::feof(fp)) {
512-
clearerr(fp);
513-
return io_result_mk_ok(mk_string(result));
514-
} else {
515-
return io_result_mk_error(decode_io_error(errno, nullptr));
516-
}
517-
first = false;
491+
char* buf = NULL;
492+
size_t n = 0;
493+
ssize_t read = getline(&buf, &n, fp);
494+
if (read != -1) {
495+
obj_res ret = io_result_mk_ok(mk_string_from_bytes(buf, read));
496+
free(buf);
497+
return ret;
498+
} else if (std::feof(fp)) {
499+
clearerr(fp);
500+
return io_result_mk_ok(mk_string(""));
501+
} else {
502+
return io_result_mk_error(decode_io_error(errno, nullptr));
518503
}
519504
}
520505

521506
/* Handle.putStr : (@& Handle) → (@& String) → IO Unit */
522507
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_put_str(b_obj_arg h, b_obj_arg s, obj_arg /* w */) {
523508
FILE * fp = io_get_handle(h);
524-
if (std::fputs(lean_string_cstr(s), fp) != EOF) {
509+
usize n = lean_string_size(s) - 1; // - 1 to ignore the terminal NULL byte.
510+
usize m = std::fwrite(lean_string_cstr(s), 1, n, fp);
511+
if (m == n) {
525512
return io_result_mk_ok(box(0));
526513
} else {
527514
return io_result_mk_error(decode_io_error(errno, nullptr));

src/runtime/object.h

+1
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ inline size_t string_capacity(object * o) { return lean_string_capacity(o); }
238238
inline uint32 char_default_value() { return lean_char_default_value(); }
239239
inline obj_res alloc_string(size_t size, size_t capacity, size_t len) { return lean_alloc_string(size, capacity, len); }
240240
inline obj_res mk_string(char const * s) { return lean_mk_string(s); }
241+
inline obj_res mk_string_from_bytes(char const * s, size_t sz) { return lean_mk_string_from_bytes(s, sz); }
241242
LEAN_EXPORT obj_res mk_ascii_string_unchecked(std::string const & s);
242243
LEAN_EXPORT obj_res mk_string(std::string const & s);
243244
LEAN_EXPORT std::string string_to_std(b_obj_arg o);

tests/lean/run/3546.lean

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
def test : IO Unit := do
2+
let tmpFile := "3546.tmp"
3+
let firstLine := "foo\u0000bar\n"
4+
let content := firstLine ++ "hello world\nbye"
5+
IO.FS.writeFile tmpFile content
6+
let handle ← IO.FS.Handle.mk tmpFile .read
7+
let firstReadLine ← handle.getLine
8+
let cond := firstLine == firstReadLine && firstReadLine.length == 8 -- paranoid
9+
IO.println cond
10+
IO.FS.removeFile tmpFile
11+
12+
/-- info: true -/
13+
#guard_msgs in
14+
#eval test

0 commit comments

Comments
 (0)