Deprecate isnumber(), is_assigned_char() and normalize_string()

nalimilan · nalimilan · commit 756936abe846 · 2017-12-12T22:28:41.000+01:00
isnumeric() is consistent with Python and Rust (but not Go), and less easy to confuse
with isdigit(). Improve documentation to make confusion less easy. Also fix a few uses
where isdigit() is more appropriate than isnumber().
diff --git a/NEWS.md b/NEWS.md
@@ -744,6 +744,10 @@ Deprecated or removed
     `isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
     `isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.
 
+  * `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
+    in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
+    in the new `Unicode` standard library module ([#25021]).
+
 Command-line option changes
 ---------------------------
 
diff --git a/base/client.jl b/base/client.jl
@@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString)
         s = split(line, '*'; keep = false)
         map!(strip, s, s)
         if length(s) > 1
-            cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
+            cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
             push!(machines,(s[2], cnt))
         else
             push!(machines,line)
diff --git a/base/distributed/Distributed.jl b/base/distributed/Distributed.jl
@@ -15,7 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
             binding_module, notify_error, atexit, julia_exename, julia_cmd,
             AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
             shell_escape_posixly, uv_error
-using Base.Unicode: isascii, isdigit, isnumber
+using Base.Unicode: isascii, isdigit, isnumeric
 
 # NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use
 
diff --git a/base/loading.jl b/base/loading.jl
@@ -69,7 +69,7 @@ elseif Sys.isapple()
         # If there is no match, it's possible that the file does exist but HFS+
         # performed unicode normalization. See  https://developer.apple.com/library/mac/qa/qa1235/_index.html.
         Unicode.isascii(path_basename) && return false
-        Vector{UInt8}(Unicode.normalize_string(path_basename, :NFD)) == casepreserved_basename
+        Vector{UInt8}(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
     end
 else
     # Generic fallback that performs a slow directory listing.
diff --git a/base/precompile.jl b/base/precompile.jl
@@ -68,7 +68,7 @@ precompile(Tuple{typeof(Base.lstrip), Base.SubString{String}, Array{Char, 1}})
 precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), String, Char})
 precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), Base.SubString{String}, Char})
 precompile(Tuple{typeof(Base.map!), typeof(Base.strip), Array{Base.SubString{String}, 1}, Array{Base.SubString{String}, 1}})
-precompile(Tuple{typeof(Base.Unicode.isnumber), Base.SubString{String}})
+precompile(Tuple{typeof(Base.Unicode.isnumeric), Base.SubString{String}})
 precompile(Tuple{Type{Core.Inference.Generator{I, F} where F where I}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
 precompile(Tuple{Type{Core.Inference.Generator{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}, Type{Core.Inference.Const}}}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
 precompile(Tuple{typeof(Core.Inference.convert), Type{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
diff --git a/base/regex.jl b/base/regex.jl
@@ -338,11 +338,11 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
             if repl[next_i] == SUB_CHAR
                 write(io, SUB_CHAR)
                 i = nextind(repl, next_i)
-            elseif Unicode.isnumber(repl[next_i])
+            elseif Unicode.isdigit(repl[next_i])
                 group = parse(Int, repl[next_i])
                 i = nextind(repl, next_i)
                 while i <= e
-                    if Unicode.isnumber(repl[i])
+                    if Unicode.isdigit(repl[i])
                         group = 10group + parse(Int, repl[i])
                         i = nextind(repl, i)
                     else
@@ -364,7 +364,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
                 end
                 #  TODO: avoid this allocation
                 groupname = SubString(repl, groupstart, prevind(repl, i))
-                if all(Unicode.isnumber,groupname)
+                if all(Unicode.isdigit, groupname)
                     _write_capture(io, re, parse(Int, groupname))
                 else
                     group = PCRE.substring_number_from_name(re.regex, groupname)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
@@ -148,7 +148,7 @@ end
 
 utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)
 
-function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
+function normalize(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
     flags = 0
     stable && (flags = flags | UTF8PROC_STABLE)
     compat && (flags = flags | UTF8PROC_COMPAT)
@@ -173,7 +173,7 @@ function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=fa
 end
 
 """
-    normalize_string(s::AbstractString, normalform::Symbol)
+    Unicode.normalize(s::AbstractString, normalform::Symbol)
 
 Normalize the string `s` according to one of the four "normal forms" of the Unicode
 standard: `normalform` can be `:NFC`, `:NFD`, `:NFKC`, or `:NFKD`.  Normal forms C
@@ -185,7 +185,7 @@ canonical choice (e.g. they expand ligatures into the individual characters), wi
 being more compact.
 
 Alternatively, finer control and additional transformations may be be obtained by calling
-`normalize_string(s; keywords...)`, where any number of the following boolean keywords
+`Unicode.normalize(s; keywords...)`, where any number of the following boolean keywords
 options (which all default to `false` except for `compose`) are specified:
 
 * `compose=false`: do not perform canonical composition
@@ -211,17 +211,17 @@ For example, NFKC corresponds to the options `compose=true, compat=true, stable=
 ```jldoctest
 julia> using Unicode
 
-julia> "μ" == normalize_string("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
+julia> "μ" == normalize("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
 true
 
-julia> normalize_string("JuLiA", casefold=true)
+julia> normalize("JuLiA", casefold=true)
 "julia"
 
-julia> normalize_string("JúLiA", stripmark=true)
+julia> normalize("JúLiA", stripmark=true)
 "JuLiA"
 ```
 """
-function normalize_string(s::AbstractString, nf::Symbol)
+function normalize(s::AbstractString, nf::Symbol)
     utf8proc_map(s, nf == :NFC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE) :
                     nf == :NFD ? (UTF8PROC_STABLE | UTF8PROC_DECOMPOSE) :
                     nf == :NFKC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE
@@ -281,22 +281,22 @@ category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UI
 category_string(c) = category_strings[category_code(c)+1]
 
 """
-    is_assigned_char(c) -> Bool
+    Unicode.isassigned(c) -> Bool
 
 Returns `true` if the given char or integer is an assigned Unicode code point.
 
 # Examples
 ```jldoctest
 julia> using Unicode
 
-julia> is_assigned_char(101)
+julia> isassigned(101)
 true
 
-julia> is_assigned_char('\\x01')
+julia> isassigned('\\x01')
 true
 ```
 """
-is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
+isassigned(c) = category_code(c) != UTF8PROC_CATEGORY_CN
 
 ## libc character class predicates ##
 
@@ -354,7 +354,7 @@ end
 """
     isdigit(c::Char) -> Bool
 
-Tests whether a character is a numeric digit (0-9).
+Tests whether a character is a decimal digit (0-9).
 
 # Examples
 ```jldoctest
@@ -396,27 +396,33 @@ false
 isalpha(c::Char)  = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)
 
 """
-    isnumber(c::Char) -> Bool
+    isnumeric(c::Char) -> Bool
 
 Tests whether a character is numeric.
 A character is classified as numeric if it belongs to the Unicode general category Number,
 i.e. a character whose category code begins with 'N'.
 
+Note that this broad category includes characters such as ¾ and ௰.
+Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9.
+
 # Examples
 ```jldoctest
 julia> using Unicode
 
-julia> isnumber('9')
+julia> isnumeric('௰')
+true
+
+julia> isnumeric('9')
 true
 
-julia> isnumber('α')
+julia> isnumeric('α')
 false
 
-julia> isnumber('❤')
+julia> isnumeric('❤')
 false
 ```
 """
-isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
+isnumeric(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
 
 """
     isalnum(c::Char) -> Bool
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
@@ -617,8 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli
 
 Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference
 (variable, object field, or array element) is uninitialized, accessing it will immediately throw
-an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref)
-functions.
+an error. This situation can be detected using the [`isdefined`](@ref) or
+[`isassigned`](@ref Base.isassigned) functions.
 
 Some functions are used only for their side effects, and do not need to return a value. In these
 cases, the convention is to return the value `nothing`, which is just a singleton object of type
@@ -627,7 +627,7 @@ this convention, and that the REPL does not print anything for it. Some language
 would not otherwise have a value also yield `nothing`, for example `if false; end`.
 
 To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the
-[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details.
+[`missing`](@ref) object. See the [`Missing Values`](@ref missing) section for more details.
 
 The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of
 as nothing but rather a tuple of zero values.
diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md
@@ -1,8 +1,8 @@
 # Unicode
 
 ```@docs
-Unicode.is_assigned_char
-Unicode.normalize_string
+Unicode.isassigned
+Unicode.normalize
 Unicode.graphemes
 Unicode.uppercase
 Unicode.lowercase
@@ -16,7 +16,7 @@ Unicode.iscntrl
 Unicode.isdigit
 Unicode.isgraph
 Unicode.islower
-Unicode.isnumber
+Unicode.isnumeric
 Unicode.isprint
 Unicode.ispunct
 Unicode.isspace
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
@@ -4,14 +4,23 @@ __precompile__(true)
 
 module Unicode
 
-using Base.Unicode: normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
-                    islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
+using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
+                    islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
                     iscntrl, ispunct, isspace, isprint, isgraph,
                     lowercase, uppercase, titlecase, lcfirst, ucfirst
 
-export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
-       islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
+export normalize, graphemes, isassigned, textwidth, isvalid,
+       islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
        iscntrl, ispunct, isspace, isprint, isgraph,
        lowercase, uppercase, titlecase, lcfirst, ucfirst
 
+# BEGIN 0.7 deprecations
+
+@deprecate isnumber(c::Char) Unicode.isnumeric(c)
+@deprecate is_assigned_char(c::Char) Unicode.isassigned(c)
+@deprecate normalize_string(s::AbstractString, nf::Symbol; kwargs...) Unicode.normalize(s, nf; kwargs...)
+@deprecate normalize_string(s::AbstractString; kwargs...) Unicode.normalize(s; kwargs...)
+
+# END 0.7 deprecations
+
 end
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
diff --git a/test/strings/basic.jl b/test/strings/basic.jl