diff --git a/base/c.jl b/base/c.jl index 13e1808fc5883..ee7eb1ab15618 100644 --- a/base/c.jl +++ b/base/c.jl @@ -106,13 +106,15 @@ if is_windows() function cwstring(s::AbstractString) bytes = String(s).data 0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))")) - return push!(utf8to16(bytes), 0) + return push!(transcode(UInt16, bytes), 0) end end -# conversions between UTF-8 and UTF-16 for Windows APIs +# transcoding between data in UTF-8 and UTF-16 for Windows APIs -function utf8to16(src::Vector{UInt8}) +transcode{T<:Union{UInt8,UInt16}}(::Type{T}, src::Vector{T}) = src + +function transcode(::Type{UInt16}, src::Vector{UInt8}) dst = UInt16[] i, n = 1, length(src) n > 0 || return dst @@ -162,7 +164,7 @@ function utf8to16(src::Vector{UInt8}) return dst end -function utf16to8(src::Vector{UInt16}) +function transcode(::Type{UInt8}, src::Vector{UInt16}) dst = UInt8[] i, n = 1, length(src) n > 0 || return dst diff --git a/base/env.jl b/base/env.jl index f1e0076bc2b66..cb21ecbe0dcf4 100644 --- a/base/env.jl +++ b/base/env.jl @@ -19,7 +19,7 @@ function access_env(onError::Function, str::AbstractString) error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage())) end pop!(val) # NUL - return String(utf16to8(val)) + return String(transcode(UInt8, val)) end function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true) @@ -97,7 +97,7 @@ function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}}) len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos) buf = Array{UInt16}(len) unsafe_copy!(pointer(buf), pos, len) - env = String(utf16to8(buf)) + env = String(transcode(UInt8, buf)) m = match(r"^(=?[^=]+)=(.*)$"s, env) if m === nothing error("malformed environment entry: $env") diff --git a/base/file.jl b/base/file.jl index b7d2ffbd1a8a7..cf11560eb745b 100644 --- a/base/file.jl +++ b/base/file.jl @@ -203,7 +203,7 @@ function tempdir() error("GetTempPath failed: $(Libc.FormatMessage())") end resize!(temppath,lentemppath) - return String(utf16to8(temppath)) + return String(transcode(UInt8, temppath)) end tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique) const temp_prefix = cwstring("jl_") @@ -216,7 +216,7 @@ function tempname(temppath::AbstractString,uunique::UInt32) error("GetTempFileName failed: $(Libc.FormatMessage())") end resize!(tname,lentname) - return String(utf16to8(tname)) + return String(transcode(UInt8, tname)) end function mktemp(parent=tempdir()) filename = tempname(parent, UInt32(0)) diff --git a/base/filesystem.jl b/base/filesystem.jl index 17f2e35f5796f..c43f8bf92836b 100644 --- a/base/filesystem.jl +++ b/base/filesystem.jl @@ -42,8 +42,8 @@ export File, import Base: UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen, nb_available, position, read, read!, readavailable, seek, seekend, show, - skip, stat, unsafe_read, unsafe_write, utf16to8, utf8to16, uv_error, - uvhandle, uvtype, write + skip, stat, unsafe_read, unsafe_write, transcode, uv_error, uvhandle, + uvtype, write if is_windows() import Base: cwstring diff --git a/base/interactiveutil.jl b/base/interactiveutil.jl index d78fffad41608..22821a8c4fd6f 100644 --- a/base/interactiveutil.jl +++ b/base/interactiveutil.jl @@ -150,7 +150,7 @@ elseif is_windows() len = 0 while unsafe_load(plock, len+1) != 0; len += 1; end # get Vector{UInt16}, transcode data to UTF-8, make a String of it - s = String(utf16to8(unsafe_wrap(Array, plock, len))) + s = String(transcode(UInt8, unsafe_wrap(Array, plock, len))) systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock)) return s end diff --git a/base/libc.jl b/base/libc.jl index f7c662f3c6dac..6943d457ebde7 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -2,14 +2,14 @@ module Libc +import Base: transcode + export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc, - errno, strerror, flush_cstdio, systemsleep, time + errno, strerror, flush_cstdio, systemsleep, time, transcode if is_windows() export GetLastError, FormatMessage end -import Base: utf16to8 - include(string(length(Core.ARGS)>=2?Core.ARGS[2]:"","errno_h.jl")) # include($BUILDROOT/base/errno_h.jl) ## RawFD ## @@ -277,7 +277,7 @@ if is_windows() buf = Array{UInt16}(len) unsafe_copy!(pointer(buf), p, len) ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p) - return String(utf16to8(buf)) + return String(transcode(UInt8, buf)) end end diff --git a/base/path.jl b/base/path.jl index c50f3e386981f..85aec28aadb07 100644 --- a/base/path.jl +++ b/base/path.jl @@ -136,7 +136,7 @@ function realpath(path::AbstractString) systemerror(:realpath, n == 0) x = n < length(buf) # is the buffer big enough? resize!(buf, n) # shrink if x, grow if !x - x && return String(utf16to8(buf)) + x && return String(transcode(UInt8, buf)) end end @@ -150,7 +150,7 @@ function longpath(path::AbstractString) systemerror(:longpath, n == 0) x = n < length(buf) # is the buffer big enough? resize!(buf, n) # shrink if x, grow if !x - x && return String(utf16to8(buf)) + x && return String(transcode(UInt8, buf)) end end diff --git a/test/misc.jl b/test/misc.jl index 0fd15cdfb7969..e3ae6b72e8ce6 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -209,11 +209,11 @@ whos(IOBuffer(), Tmp14173) # warm up @test @allocated(whos(IOBuffer(), Tmp14173)) < 10000 ## test conversion from UTF-8 to UTF-16 (for Windows APIs) -import Base: utf8to16, utf16to8 +import Base.Libc: transcode # empty arrays -@test utf8to16(UInt8[]) == UInt16[] -@test utf16to8(UInt16[]) == UInt8[] +@test transcode(UInt16, UInt8[]) == UInt16[] +@test transcode(UInt8, UInt16[]) == UInt8[] # UTF-8-like sequences V8 = [ @@ -304,15 +304,15 @@ I8 = [(s,map(UInt16,s)) for s in X8] for (X,Y,Z) in ((V8,V8,V8), (I8,V8,I8), (V8,I8,V8), (V8,V8,I8), (I8,V8,V8)) for (a8, a16) in X - @test utf8to16(a8) == a16 + @test transcode(UInt16, a8) == a16 for (b8, b16) in Y ab8 = [a8; b8] ab16 = [a16; b16] - @test utf8to16(ab8) == ab16 + @test transcode(UInt16, ab8) == ab16 for (c8, c16) in Z abc8 = [ab8; c8] abc16 = [ab16; c16] - @test utf8to16(abc8) == abc16 + @test transcode(UInt16, abc8) == abc16 end end end @@ -359,18 +359,18 @@ I16 = [ for (X,Y,Z) in ((V16,V16,V16), (I16,V16,I16), (V16,I16,V16), (V16,V16,I16), (I16,V16,V16)) for (a16, a8) in X - @test utf16to8(a16) == a8 - @test utf8to16(a8) == a16 + @test transcode(UInt8, a16) == a8 + @test transcode(UInt16, a8) == a16 for (b16, b8) in Y ab16 = [a16; b16] ab8 = [a8; b8] - @test utf16to8(ab16) == ab8 - @test utf8to16(ab8) == ab16 + @test transcode(UInt8, ab16) == ab8 + @test transcode(UInt16, ab8) == ab16 for (c16, c8) in Z abc16 = [ab16; c16] abc8 = [ab8; c8] - @test utf16to8(abc16) == abc8 - @test utf8to16(abc8) == abc16 + @test transcode(UInt8, abc16) == abc8 + @test transcode(UInt16, abc8) == abc16 end end end