diff --git a/base/ascii.jl b/base/ascii.jl deleted file mode 100644 index bb5dd899d52a1..0000000000000 --- a/base/ascii.jl +++ /dev/null @@ -1,7 +0,0 @@ -# This file is a part of Julia. License is MIT: http://julialang.org/license - -ascii(x) = ascii(convert(String, x)) -ascii(p::Ptr{UInt8}) = ascii(bytestring(p)) -ascii(p::Ptr{UInt8}, len::Integer) = ascii(bytestring(p, len)) -ascii(s::String) = byte_string_classify(s.data) == 1 ? s : - throw(ArgumentError("invalid ASCII sequence")) diff --git a/base/deprecated.jl b/base/deprecated.jl index 07d4944005fb1..348cce6cdfd27 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1155,6 +1155,10 @@ end @deprecate_binding UTF8String String @deprecate_binding ByteString String +@deprecate ascii(p::Ptr{UInt8}, len::Integer) ascii(bytestring(p, len)) +@deprecate ascii(p::Ptr{UInt8}) ascii(bytestring(p)) +@deprecate ascii(x) ascii(string(x)) + @deprecate ==(x::Char, y::Integer) UInt32(x) == y @deprecate ==(x::Integer, y::Char) x == UInt32(y) @deprecate isless(x::Char, y::Integer) UInt32(x) < y diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl index 5252a07cc526d..480d57f73e2c5 100644 --- a/base/docs/helpdb/Base.jl +++ b/base/docs/helpdb/Base.jl @@ -8719,28 +8719,13 @@ alias the input `x` to modify it in-place. filt! """ - ascii(::Array{UInt8,1}) + ascii(s::AbstractString) -Create an ASCII string from a byte array. -""" -ascii(::Vector{UInt8}) - -""" - ascii(s) - -Convert a string to a contiguous ASCII string (all characters must be valid ASCII characters). +Convert a string to `String` type and check that it contains only ASCII data, otherwise +throwing an `ArugmentError` indicating the position of the first non-ASCII byte. """ ascii(s) -""" - ascii(::Ptr{UInt8}, [length]) - -Create an ASCII string from the address of a C (0-terminated) string encoded in ASCII. A -copy is made; the ptr can be safely freed. If `length` is specified, the string does not -have to be 0-terminated. -""" -ascii(::Ptr{UInt8},?) - """ maxabs(itr) diff --git a/base/printf.jl b/base/printf.jl index c2882d65452c1..448b845fc869d 100644 --- a/base/printf.jl +++ b/base/printf.jl @@ -98,7 +98,7 @@ function parse1(s::AbstractString, k::Integer) while c in "#0- + '" c, k = next_or_die(s,k) end - flags = ascii(s[j:k-2]) + flags = String(s[j:k-2]) # parse width while '0' <= c <= '9' width = 10*width + c-'0' diff --git a/base/socket.jl b/base/socket.jl index 9f8bc27c3b6a6..2c5ee4e3e81af 100644 --- a/base/socket.jl +++ b/base/socket.jl @@ -586,6 +586,7 @@ function uv_getaddrinfocb(req::Ptr{Void}, status::Cint, addrinfo::Ptr{Void}) end function getaddrinfo(cb::Function, host::String) + isascii(host) || error("non-ASCII hostname: $host") callback_dict[cb] = cb status = ccall(:jl_getaddrinfo, Int32, (Ptr{Void}, Cstring, Ptr{UInt8}, Any, Ptr{Void}), eventloop(), host, C_NULL, cb, uv_jl_getaddrinfocb::Ptr{Void}) @@ -598,10 +599,9 @@ function getaddrinfo(cb::Function, host::String) end return nothing end -getaddrinfo(cb::Function, host::AbstractString) = getaddrinfo(cb,ascii(host)) +getaddrinfo(cb::Function, host::AbstractString) = getaddrinfo(cb, String(host)) function getaddrinfo(host::String) - isascii(host) || error("non-ASCII hostname: $host") c = Condition() getaddrinfo(host) do IP notify(c,IP) diff --git a/base/strings/util.jl b/base/strings/util.jl index 726339d529955..38c2cd9e5f82b 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -243,3 +243,13 @@ function bytes2hex(a::AbstractArray{UInt8}) end return String(b) end + +# check for pure ASCII-ness + +function ascii(s::String) + for (i, b) in enumerate(s.data) + b < 0x80 || throw(ArgumentError("invalid ASCII at index $i in $(repr(s))")) + end + return s +end +ascii(x::AbstractString) = ascii(convert(String, x)) diff --git a/base/sysimg.jl b/base/sysimg.jl index aed8231efd4c9..8a9e4d264d590 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -125,7 +125,6 @@ include("iobuffer.jl") # strings & printing include("char.jl") -include("ascii.jl") include("string.jl") include("unicode.jl") include("parse.jl") diff --git a/doc/stdlib/strings.rst b/doc/stdlib/strings.rst index f05af60aba917..1e38401839a13 100644 --- a/doc/stdlib/strings.rst +++ b/doc/stdlib/strings.rst @@ -62,24 +62,12 @@ Convert a string to a contiguous byte array representation appropriate for passing it to C functions. The string will be encoded as either ASCII or UTF-8. -.. function:: ascii(::Array{UInt8,1}) - - .. Docstring generated from Julia source - - Create an ASCII string from a byte array. - -.. function:: ascii(s) +.. function:: ascii(s::AbstractString) .. Docstring generated from Julia source Convert a string to a contiguous ASCII string (all characters must be valid ASCII characters). -.. function:: ascii(::Ptr{UInt8}, [length]) - - .. Docstring generated from Julia source - - Create an ASCII string from the address of a C (0-terminated) string encoded in ASCII. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated. - .. function:: utf8(::Array{UInt8,1}) .. Docstring generated from Julia source diff --git a/examples/clustermanager/simple/UnixDomainCM.jl b/examples/clustermanager/simple/UnixDomainCM.jl index a67f68692f323..8adc39cdd5555 100644 --- a/examples/clustermanager/simple/UnixDomainCM.jl +++ b/examples/clustermanager/simple/UnixDomainCM.jl @@ -44,7 +44,7 @@ function connect(manager::UnixDomainCM, pid::Int, config::WorkerConfig) if isa(address, Tuple) sock = connect(address...) else - sock = connect(ascii(address)) + sock = connect(address) end return (sock, sock) catch e @@ -62,7 +62,7 @@ end function start_worker(sockname, cookie) Base.init_worker(cookie, UnixDomainCM(0)) - srvr = listen(ascii(sockname)) + srvr = listen(sockname) while true sock = accept(srvr) Base.process_messages(sock, sock) diff --git a/examples/lru_test.jl b/examples/lru_test.jl index cadf5212d3d67..57fca9e097980 100644 --- a/examples/lru_test.jl +++ b/examples/lru_test.jl @@ -5,7 +5,7 @@ using LRUExample TestLRU = LRUExample.UnboundedLRU{String, String}() TestBLRU = LRUExample.BoundedLRU{String, String}(1000) -get_str(i) = ascii(vcat(map(x->[x>>4; x&0x0F], reinterpret(UInt8, [Int32(i)]))...)) +get_str(i) = String(vcat(map(x->[x>>4; x&0x0F], reinterpret(UInt8, [Int32(i)]))...)) isbounded{L<:LRUExample.LRU}(::Type{L}) = any(map(n->n==:maxsize, fieldnames(L))) isbounded{L<:LRUExample.LRU}(l::L) = isbounded(L) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 99cab256668ef..a1d295c03393a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -216,17 +216,13 @@ end # issue #11142 s = "abcdefghij" sp = pointer(s) -@test ascii(sp) == s -@test ascii(sp,5) == "abcde" -@test typeof(ascii(sp)) == String +@test utf8(sp) == s +@test utf8(sp,5) == "abcde" @test typeof(utf8(sp)) == String s = "abcde\uff\u2000\U1f596" sp = pointer(s) @test utf8(sp) == s @test utf8(sp,5) == "abcde" -@test_throws ArgumentError ascii(sp) -@test ascii(sp, 5) == "abcde" -@test_throws ArgumentError ascii(sp, 6) @test typeof(utf8(sp)) == String @test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890) @@ -494,7 +490,13 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32)) # `bytestring`, `ascii` and `utf8` @test_throws ArgumentError bytestring(Ptr{UInt8}(0)) @test_throws ArgumentError bytestring(Ptr{UInt8}(0), 10) -@test_throws ArgumentError ascii(Ptr{UInt8}(0)) -@test_throws ArgumentError ascii(Ptr{UInt8}(0), 10) @test_throws ArgumentError utf8(Ptr{UInt8}(0)) @test_throws ArgumentError utf8(Ptr{UInt8}(0), 10) + +# ascii works on ASCII strings and fails on non-ASCII strings +@test ascii("Hello, world") == "Hello, world" +@test typeof(ascii("Hello, world")) == String +@test ascii(utf32("Hello, world")) == "Hello, world" +@test typeof(ascii(utf32("Hello, world"))) == String +@test_throws ArgumentError ascii("Hello, ∀") +@test_throws ArgumentError ascii(utf32("Hello, ∀"))