diff --git a/base/REPL.jl b/base/REPL.jl
index d5ac3865de569..8b156ec8c820e 100644
--- a/base/REPL.jl
+++ b/base/REPL.jl
@@ -323,11 +323,11 @@ An editor may have converted tabs to spaces at line """
function hist_getline(file)
while !eof(file)
- line = utf8(readline(file))
+ line = readline(file)
isempty(line) && return line
line[1] in "\r\n" || return line
end
- return utf8("")
+ return ""
end
function hist_from_file(hp, file)
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 6e5a882f1c6fa..a8f475807a7a1 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -1160,9 +1160,17 @@ end
@deprecate_binding UTF8String String
@deprecate_binding ByteString String
+@deprecate utf8(p::Ptr{UInt8}, len::Integer) String(p, len)
+@deprecate utf8(p::Ptr{UInt8}) String(p)
+@deprecate utf8(v::Vector{UInt8}) String(v)
+@deprecate utf8(s::AbstractString) String(p)
+@deprecate utf8(x) convert(String, x)
+
@deprecate ascii(p::Ptr{UInt8}, len::Integer) ascii(String(p, len))
@deprecate ascii(p::Ptr{UInt8}) ascii(String(p))
-@deprecate ascii(x) ascii(string(x))
+@deprecate ascii(v::Vector{UInt8}) ascii(String(v))
+@deprecate ascii(s::AbstractString) ascii(String(p))
+@deprecate ascii(x) ascii(convert(String, x))
@deprecate bytestring(s::Cstring) String(s)
@deprecate bytestring(v::Vector{UInt8}) String(v)
diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl
index 412daf429e2f2..0d0bd501447dc 100644
--- a/base/docs/helpdb/Base.jl
+++ b/base/docs/helpdb/Base.jl
@@ -1854,7 +1854,7 @@ Dict{String,Float64} with 2 entries:
"bar" => 42.0
"foo" => 0.0
-julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
+julia> b = Dict("baz" => 17, "bar" => 4711)
Dict{String,Int64} with 2 entries:
"bar" => 4711
"baz" => 17
@@ -2962,29 +2962,6 @@ Extract a named field from a `value` of composite type. The syntax `a.b` calls
"""
getfield
-"""
- utf8(::Array{UInt8,1})
-
-Create a UTF-8 string from a byte array.
-"""
-utf8(::Vector{UInt8})
-
-"""
- utf8(::Ptr{UInt8}, [length])
-
-Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy
-is made; the ptr can be safely freed. If `length` is specified, the string does not have to
-be 0-terminated.
-"""
-utf8(::Ptr{UInt8}, length::Int = 1)
-
-"""
- utf8(s)
-
-Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
-"""
-utf8(s)
-
"""
hvcat(rows::Tuple{Vararg{Int}}, values...)
diff --git a/base/env.jl b/base/env.jl
index bc0482c2e0702..7b3e7f1da3ef6 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -34,7 +34,7 @@ function access_env(onError::Function, str::AbstractString)
var = cwstring(str)
len = _getenvlen(var)
if len == 0
- return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
+ return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
end
val = zeros(UInt16,len)
ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)
diff --git a/base/exports.jl b/base/exports.jl
index d0810e0e9d814..bf47c04a31ca1 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -887,7 +887,6 @@ export
ucfirst,
unescape_string,
uppercase,
- utf8,
utf16,
utf32,
warn,
diff --git a/base/libc.jl b/base/libc.jl
index 6585e4753f1f3..648b1a4024c40 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -259,7 +259,7 @@ function FormatMessage end
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK,
C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
p = lpMsgBuf[1]
- len == 0 && return utf8("")
+ len == 0 && return ""
buf = Array(UInt16, len)
unsafe_copy!(pointer(buf), p, len)
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
diff --git a/base/libgit2.jl b/base/libgit2.jl
index cada5eae0e7f5..4148e26465153 100644
--- a/base/libgit2.jl
+++ b/base/libgit2.jl
@@ -465,7 +465,7 @@ function snapshot(repo::GitRepo)
work = try
with(GitIndex, repo) do idx
if length(readdir(path(repo))) > 1
- add!(idx, utf8("."))
+ add!(idx, ".")
write!(idx)
end
write_tree!(idx)
diff --git a/base/precompile.jl b/base/precompile.jl
index a9f6935a6abcd..b70c331c18f0e 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -400,7 +400,6 @@ precompile(Base.UInt, (UInt,))
precompile(Base.unsafe_copy!, (Array{Dict{Any, Any}, 1}, Int, Array{Dict{Any, Any}, 1}, Int, Int))
precompile(Base.unsafe_copy!, (Ptr{Dict{Any, Any}}, Ptr{Dict{Any, Any}}, Int))
precompile(Base.unshift!, (Array{Any,1}, Task))
-precompile(Base.utf8, (String,))
precompile(Base.uv_error, (String, Bool))
precompile(Base.uvfinalize, (Base.TTY,))
precompile(Base.vcat, (Base.LineEdit.Prompt,))
diff --git a/base/regex.jl b/base/regex.jl
index fd2eb708a9b64..42b606bbf359f 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -209,8 +209,8 @@ function matchall(re::Regex, str::String, overlap::Bool=false)
matches
end
-matchall(re::Regex, str::Union{String,SubString}, overlap::Bool=false) =
- matchall(re, utf8(str), overlap)
+matchall(re::Regex, str::SubString, overlap::Bool=false) =
+ matchall(re, String(str), overlap)
function search(str::Union{String,SubString}, re::Regex, idx::Integer)
if idx > nextind(str,endof(str))
diff --git a/base/show.jl b/base/show.jl
index 938bd78b7c169..617229b261a60 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1189,7 +1189,7 @@ Accept keyword args `c` for alternate single character marker.
"""
function replace_with_centered_mark(s::AbstractString;c::Char = '⋅')
N = length(s)
- return join(setindex!([utf8(" ") for i=1:N],string(c),ceil(Int,N/2)))
+ return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
end
"""
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 2d7869f61df53..762a9f62d62d7 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -32,8 +32,8 @@ String(s::Vector{UInt8}) =
String(p::Ptr{UInt8}, [length::Integer])
Create a string from the address of a C (0-terminated) string encoded as UTF-8.
-A copy is made so the ptr can be safely freed. If `length` is specified, the string
-does not have to be 0-terminated.
+A copy is made so the pointer can be safely freed. If `length` is specified, the
+string does not have to be 0-terminated.
"""
function String(p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer)
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl
index 0e7da162a405c..28d0a0f9f609a 100644
--- a/base/unicode/utf8.jl
+++ b/base/unicode/utf8.jl
@@ -230,7 +230,6 @@ write(io::IO, s::String) = write(io, s.data)
## transcoding to UTF-8 ##
-utf8(x) = convert(String, x)
convert(::Type{String}, s::String) = s
function convert(::Type{String}, dat::Vector{UInt8})
@@ -350,10 +349,3 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
end
String(buf)
end
-
-utf8(p::Ptr{UInt8}) =
- utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
-function utf8(p::Ptr{UInt8}, len::Integer)
- p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
- String(ccall(:jl_pchar_to_array, Vector{UInt8}, (Ptr{UInt8}, Csize_t), p, len))
-end
diff --git a/contrib/BBEditTextWrangler-julia.plist b/contrib/BBEditTextWrangler-julia.plist
index fb6a20cb8f1e8..b4c88a9ccfc55 100644
--- a/contrib/BBEditTextWrangler-julia.plist
+++ b/contrib/BBEditTextWrangler-julia.plist
@@ -1177,7 +1177,6 @@
using
utf16
utf32
- utf8
values
var
varm
diff --git a/doc/stdlib/collections.rst b/doc/stdlib/collections.rst
index 4e401e3cd6445..fcbe58b194c28 100644
--- a/doc/stdlib/collections.rst
+++ b/doc/stdlib/collections.rst
@@ -842,7 +842,7 @@ Given a dictionary ``D``, the syntax ``D[x]`` returns the value of key ``x`` (if
"bar" => 42.0
"foo" => 0.0
- julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
+ julia> b = Dict("baz" => 17, "bar" => 4711)
Dict{String,Int64} with 2 entries:
"bar" => 4711
"baz" => 17
diff --git a/doc/stdlib/strings.rst b/doc/stdlib/strings.rst
index fc378b859f7ba..a1bb0175c9477 100644
--- a/doc/stdlib/strings.rst
+++ b/doc/stdlib/strings.rst
@@ -68,24 +68,6 @@
Convert a string to ``String`` type and check that it contains only ASCII data, otherwise throwing an ``ArugmentError`` indicating the position of the first non-ASCII byte.
-.. function:: utf8(::Array{UInt8,1})
-
- .. Docstring generated from Julia source
-
- Create a UTF-8 string from a byte array.
-
-.. function:: utf8(::Ptr{UInt8}, [length])
-
- .. Docstring generated from Julia source
-
- Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated.
-
-.. function:: utf8(s)
-
- .. Docstring generated from Julia source
-
- Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
-
.. function:: @r_str -> Regex
.. Docstring generated from Julia source
diff --git a/test/base64.jl b/test/base64.jl
index e3e71d79a5989..1055cc1c302f9 100644
--- a/test/base64.jl
+++ b/test/base64.jl
@@ -24,7 +24,7 @@ end
rm(fname)
# Encode to string and decode
-@test utf8(base64decode(base64encode(inputText))) == inputText
+@test String(base64decode(base64encode(inputText))) == inputText
# Decode with max line chars = 76 and padding
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76))
diff --git a/test/dict.jl b/test/dict.jl
index d31da6008edb8..a9351357df14d 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -263,7 +263,7 @@ end
for d in (Dict("\n" => "\n", "1" => "\n", "\n" => "2"),
[string(i) => i for i = 1:30],
[reshape(1:i^2,i,i) => reshape(1:i^2,i,i) for i = 1:24],
- [utf8(Char['α':'α'+i;]) => utf8(Char['α':'α'+i;]) for i = (1:10)*10],
+ [String(Char['α':'α'+i;]) => String(Char['α':'α'+i;]) for i = (1:10)*10],
Dict("key" => zeros(0, 0)))
for cols in (12, 40, 80), rows in (2, 10, 24)
# Ensure output is limited as requested
diff --git a/test/replcompletions.jl b/test/replcompletions.jl
index f446004fdccbd..26a1cfb034e0d 100644
--- a/test/replcompletions.jl
+++ b/test/replcompletions.jl
@@ -569,7 +569,7 @@ c, r, res = test_scomplete(s)
withenv("PATH" => string(tempdir(), ":", dir)) do
s = string("repl-completio")
c,r = test_scomplete(s)
- @test [utf8("repl-completion")] == c
+ @test ["repl-completion"] == c
@test s[r] == "repl-completio"
end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index a45e0d7a5e08f..21bfd2bc36f46 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -215,14 +215,14 @@ end
# issue #11142
s = "abcdefghij"
sp = pointer(s)
-@test utf8(sp) == s
-@test utf8(sp,5) == "abcde"
-@test typeof(utf8(sp)) == String
+@test String(sp) == s
+@test String(sp,5) == "abcde"
+@test typeof(String(sp)) == String
s = "abcde\uff\u2000\U1f596"
sp = pointer(s)
-@test utf8(sp) == s
-@test utf8(sp,5) == "abcde"
-@test typeof(utf8(sp)) == String
+@test String(sp) == s
+@test String(sp,5) == "abcde"
+@test typeof(String(sp)) == String
@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
@test isnull(tryparse(BigInt, "1234567890-"))
@@ -434,7 +434,7 @@ let s = "abcdef", u8 = "abcdef\uff", u16 = utf16(u8), u32 = utf32(u8),
@test isvalid(u8)
@test isvalid(u16)
@test isvalid(u32)
- @test isvalid(String, u8)
+ @test isvalid(String, u8)
@test isvalid(UTF16String, u16)
@test isvalid(UTF32String, u32)
end
@@ -464,11 +464,9 @@ end
# issue # 11464: uppercase/lowercase of UTF16String becomes a String
str = "abcdef\uff\uffff\u10ffffABCDEF"
@test typeof(uppercase("abcdef")) == String
-@test typeof(uppercase(utf8(str))) == String
@test typeof(uppercase(utf16(str))) == UTF16String
@test typeof(uppercase(utf32(str))) == UTF32String
@test typeof(lowercase("ABCDEF")) == String
-@test typeof(lowercase(utf8(str))) == String
@test typeof(lowercase(utf16(str))) == UTF16String
@test typeof(lowercase(utf32(str))) == UTF32String
@@ -481,16 +479,11 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32))
@test "a".*["b","c"] == ["ab","ac"]
@test ["b","c"].*"a" == ["ba","ca"]
-@test utf8("a").*["b","c"] == ["ab","ac"]
-@test "a".*map(utf8,["b","c"]) == ["ab","ac"]
@test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"]
-# Make sure NULL pointer are handled consistently by
-# `String`, `ascii` and `utf8`
+# Make sure NULL pointer are handled consistently by String
@test_throws ArgumentError String(Ptr{UInt8}(0))
@test_throws ArgumentError String(Ptr{UInt8}(0), 10)
-@test_throws ArgumentError utf8(Ptr{UInt8}(0))
-@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)
# ascii works on ASCII strings and fails on non-ASCII strings
@test ascii("Hello, world") == "Hello, world"
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 0a510b8355b4c..48991654a7a81 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -13,7 +13,7 @@ slen_u8str2 = length(u8str2)
@test len_u8str2 == 2 * len_u8str
@test slen_u8str2 == 2 * slen_u8str
-u8str2plain = utf8(u8str2)
+u8str2plain = String(u8str2)
for i1 = 1:length(u8str2)
if !isvalid(u8str2, i1); continue; end
@@ -93,8 +93,7 @@ u = SubString(str, 1, 5)
@test prevind(SubString("{var}",2,4),4) == 3
# issue #4183
-@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
-@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
+@test split(SubString("x", 2, 0), "y") == AbstractString[""]
# issue #6772
@test float(SubString("10",1,1)) === 1.0
@@ -132,7 +131,7 @@ let s="lorem ipsum",
end #let
#for isvalid(SubString{String})
-let s = utf8("Σx + βz - 2")
+let s = "Σx + βz - 2"
for i in -1:length(s)+2
ss=SubString(s,1,i)
@test isvalid(ss,i)==isvalid(s,i)
diff --git a/test/unicode/checkstring.jl b/test/unicode/checkstring.jl
index c19b8b9a7324c..e9bab9640950b 100644
--- a/test/unicode/checkstring.jl
+++ b/test/unicode/checkstring.jl
@@ -89,11 +89,7 @@ try
@test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0xc0])
end
- # Long encoding of 0x01
- @test_throws UnicodeError utf8(b"\xf0\x80\x80\x80")
- # Test ends of long encoded surrogates
- @test_throws UnicodeError utf8(b"\xf0\x8d\xa0\x80")
- @test_throws UnicodeError utf8(b"\xf0\x8d\xbf\xbf")
+ # Long encodings
@test_throws UnicodeError Base.checkstring(b"\xf0\x80\x80\x80")
@test Base.checkstring(b"\xc0\x81"; accept_long_char=true) == (1,0x1,0,0,0)
@test Base.checkstring(b"\xf0\x80\x80\x80"; accept_long_char=true) == (1,0x1,0,0,0)
diff --git a/test/unicode/utf16.jl b/test/unicode/utf16.jl
index 6e0a0b14ec03f..1c8e31cdece98 100644
--- a/test/unicode/utf16.jl
+++ b/test/unicode/utf16.jl
@@ -6,7 +6,7 @@ u16 = utf16(u8)
@test sizeof(u16) == 18
@test length(u16.data) == 10 && u16.data[end] == 0
@test length(u16) == 5
-@test utf8(u16) == u8
+@test String(u16) == u8
@test collect(u8) == collect(u16)
@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))
diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl
index 8165a6aaecc6d..b0142090659bf 100644
--- a/test/unicode/utf32.jl
+++ b/test/unicode/utf32.jl
@@ -6,7 +6,7 @@ u32 = utf32(u8)
@test sizeof(u32) == 20
@test length(u32.data) == 6 && u32.data[end] == 0
@test length(u32) == 5
-@test utf8(u32) == u8
+@test String(u32) == u8
@test collect(u8) == collect(u32)
@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
@@ -16,9 +16,9 @@ u32 = utf32(u8)
function tstcvt(strUTF8::String, strUTF16::UTF16String, strUTF32::UTF32String)
@test utf16(strUTF8) == strUTF16
@test utf32(strUTF8) == strUTF32
- @test utf8(strUTF16) == strUTF8
+ @test String(strUTF16) == strUTF8
@test utf32(strUTF16) == strUTF32
- @test utf8(strUTF32) == strUTF8
+ @test String(strUTF32) == strUTF8
@test utf16(strUTF32) == strUTF16
end
@@ -49,7 +49,7 @@ str3_UTF32 = utf32(str3_UTF8)
str4_UTF32 = utf32(str4_UTF8)
strS_UTF32 = utf32(strS_UTF8)
-@test utf8(strAscii) == strAscii
+@test String(strAscii) == strAscii
@test utf16(strAscii) == strAscii
@test utf32(strAscii) == strAscii
@@ -62,13 +62,12 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32)
# Test converting surrogate pairs
@test utf16(strS_UTF8) == strC_UTF8
@test utf32(strS_UTF8) == strC_UTF8
-@test utf8(strS_UTF16) == strC_UTF8
+@test String(strS_UTF16) == strC_UTF8
@test utf32(strS_UTF16) == strC_UTF8
-@test utf8(strS_UTF32) == strC_UTF8
+@test String(strS_UTF32) == strC_UTF8
@test utf16(strS_UTF32) == strC_UTF8
# Test converting overlong \0
-@test utf8(strZ) == strz_UTF8
@test utf16(String(strZ)) == strz_UTF8
@test utf32(String(strZ)) == strz_UTF8
@@ -172,7 +171,7 @@ end
# Wstring
u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
w = wstring(u8)
-@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
+@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w)
@test u8 == WString(w.data)
# 12268
@@ -211,7 +210,7 @@ end
# Test pointer() functions
let str = ascii("this ")
- u8 = utf8(str)
+ u8 = String(str)
u16 = utf16(str)
u32 = utf32(str)
pa = pointer(str)
diff --git a/test/unicode/utf8.jl b/test/unicode/utf8.jl
index 073f5f2b4d29c..c3037a7d624df 100644
--- a/test/unicode/utf8.jl
+++ b/test/unicode/utf8.jl
@@ -5,7 +5,7 @@
let ch = 0x10000
for hichar = 0xd800:0xdbff
for lochar = 0xdc00:0xdfff
- @test convert(String, utf8(Char[hichar, lochar]).data) == string(Char(ch))
+ @test convert(String, String(Char[hichar, lochar]).data) == string(Char(ch))
ch += 1
end
end
diff --git a/test/unicode/utf8proc.jl b/test/unicode/utf8proc.jl
index 6ba0ac6f4e812..2a829a5717d55 100644
--- a/test/unicode/utf8proc.jl
+++ b/test/unicode/utf8proc.jl
@@ -234,7 +234,7 @@ let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
"\U1d4c1\u0300"]),
("x",["x"]),
("abc",["a","b","c"]))
- for T in (utf8,utf16,utf32)
+ for T in (String,utf16,utf32)
for nf in (:NFC, :NFD)
for (s, g) in grphtest
s_ = T(normalize_string(s, nf))