Skip to content

Commit 756936a

Browse files
committed
Deprecate isnumber(), is_assigned_char() and normalize_string()
isnumeric() is consistent with Python and Rust (but not Go), and less easy to confuse with isdigit(). Improve documentation to make confusion less easy. Also fix a few uses where isdigit() is more appropriate than isnumber().
1 parent 635826b commit 756936a

File tree

12 files changed

+114
-94
lines changed

12 files changed

+114
-94
lines changed

NEWS.md

+4
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,10 @@ Deprecated or removed
744744
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
745745
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.
746746

747+
* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
748+
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
749+
in the new `Unicode` standard library module ([#25021]).
750+
747751
Command-line option changes
748752
---------------------------
749753

base/client.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString)
361361
s = split(line, '*'; keep = false)
362362
map!(strip, s, s)
363363
if length(s) > 1
364-
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
364+
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
365365
push!(machines,(s[2], cnt))
366366
else
367367
push!(machines,line)

base/distributed/Distributed.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
1515
binding_module, notify_error, atexit, julia_exename, julia_cmd,
1616
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
1717
shell_escape_posixly, uv_error
18-
using Base.Unicode: isascii, isdigit, isnumber
18+
using Base.Unicode: isascii, isdigit, isnumeric
1919

2020
# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use
2121

base/loading.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ elseif Sys.isapple()
6969
# If there is no match, it's possible that the file does exist but HFS+
7070
# performed unicode normalization. See https://developer.apple.com/library/mac/qa/qa1235/_index.html.
7171
Unicode.isascii(path_basename) && return false
72-
Vector{UInt8}(Unicode.normalize_string(path_basename, :NFD)) == casepreserved_basename
72+
Vector{UInt8}(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
7373
end
7474
else
7575
# Generic fallback that performs a slow directory listing.

base/precompile.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ precompile(Tuple{typeof(Base.lstrip), Base.SubString{String}, Array{Char, 1}})
6868
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), String, Char})
6969
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), Base.SubString{String}, Char})
7070
precompile(Tuple{typeof(Base.map!), typeof(Base.strip), Array{Base.SubString{String}, 1}, Array{Base.SubString{String}, 1}})
71-
precompile(Tuple{typeof(Base.Unicode.isnumber), Base.SubString{String}})
71+
precompile(Tuple{typeof(Base.Unicode.isnumeric), Base.SubString{String}})
7272
precompile(Tuple{Type{Core.Inference.Generator{I, F} where F where I}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
7373
precompile(Tuple{Type{Core.Inference.Generator{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}, Type{Core.Inference.Const}}}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
7474
precompile(Tuple{typeof(Core.Inference.convert), Type{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})

base/regex.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -338,11 +338,11 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
338338
if repl[next_i] == SUB_CHAR
339339
write(io, SUB_CHAR)
340340
i = nextind(repl, next_i)
341-
elseif Unicode.isnumber(repl[next_i])
341+
elseif Unicode.isdigit(repl[next_i])
342342
group = parse(Int, repl[next_i])
343343
i = nextind(repl, next_i)
344344
while i <= e
345-
if Unicode.isnumber(repl[i])
345+
if Unicode.isdigit(repl[i])
346346
group = 10group + parse(Int, repl[i])
347347
i = nextind(repl, i)
348348
else
@@ -364,7 +364,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
364364
end
365365
# TODO: avoid this allocation
366366
groupname = SubString(repl, groupstart, prevind(repl, i))
367-
if all(Unicode.isnumber,groupname)
367+
if all(Unicode.isdigit, groupname)
368368
_write_capture(io, re, parse(Int, groupname))
369369
else
370370
group = PCRE.substring_number_from_name(re.regex, groupname)

base/strings/unicode.jl

+23-17
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ end
148148

149149
utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)
150150

151-
function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
151+
function normalize(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
152152
flags = 0
153153
stable && (flags = flags | UTF8PROC_STABLE)
154154
compat && (flags = flags | UTF8PROC_COMPAT)
@@ -173,7 +173,7 @@ function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=fa
173173
end
174174

175175
"""
176-
normalize_string(s::AbstractString, normalform::Symbol)
176+
Unicode.normalize(s::AbstractString, normalform::Symbol)
177177
178178
Normalize the string `s` according to one of the four "normal forms" of the Unicode
179179
standard: `normalform` can be `:NFC`, `:NFD`, `:NFKC`, or `:NFKD`. Normal forms C
@@ -185,7 +185,7 @@ canonical choice (e.g. they expand ligatures into the individual characters), wi
185185
being more compact.
186186
187187
Alternatively, finer control and additional transformations may be be obtained by calling
188-
`normalize_string(s; keywords...)`, where any number of the following boolean keywords
188+
`Unicode.normalize(s; keywords...)`, where any number of the following boolean keywords
189189
options (which all default to `false` except for `compose`) are specified:
190190
191191
* `compose=false`: do not perform canonical composition
@@ -211,17 +211,17 @@ For example, NFKC corresponds to the options `compose=true, compat=true, stable=
211211
```jldoctest
212212
julia> using Unicode
213213
214-
julia> "μ" == normalize_string("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
214+
julia> "μ" == normalize("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
215215
true
216216
217-
julia> normalize_string("JuLiA", casefold=true)
217+
julia> normalize("JuLiA", casefold=true)
218218
"julia"
219219
220-
julia> normalize_string("JúLiA", stripmark=true)
220+
julia> normalize("JúLiA", stripmark=true)
221221
"JuLiA"
222222
```
223223
"""
224-
function normalize_string(s::AbstractString, nf::Symbol)
224+
function normalize(s::AbstractString, nf::Symbol)
225225
utf8proc_map(s, nf == :NFC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE) :
226226
nf == :NFD ? (UTF8PROC_STABLE | UTF8PROC_DECOMPOSE) :
227227
nf == :NFKC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE
@@ -281,22 +281,22 @@ category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UI
281281
category_string(c) = category_strings[category_code(c)+1]
282282

283283
"""
284-
is_assigned_char(c) -> Bool
284+
Unicode.isassigned(c) -> Bool
285285
286286
Returns `true` if the given char or integer is an assigned Unicode code point.
287287
288288
# Examples
289289
```jldoctest
290290
julia> using Unicode
291291
292-
julia> is_assigned_char(101)
292+
julia> isassigned(101)
293293
true
294294
295-
julia> is_assigned_char('\\x01')
295+
julia> isassigned('\\x01')
296296
true
297297
```
298298
"""
299-
is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
299+
isassigned(c) = category_code(c) != UTF8PROC_CATEGORY_CN
300300

301301
## libc character class predicates ##
302302

@@ -354,7 +354,7 @@ end
354354
"""
355355
isdigit(c::Char) -> Bool
356356
357-
Tests whether a character is a numeric digit (0-9).
357+
Tests whether a character is a decimal digit (0-9).
358358
359359
# Examples
360360
```jldoctest
@@ -396,27 +396,33 @@ false
396396
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)
397397

398398
"""
399-
isnumber(c::Char) -> Bool
399+
isnumeric(c::Char) -> Bool
400400
401401
Tests whether a character is numeric.
402402
A character is classified as numeric if it belongs to the Unicode general category Number,
403403
i.e. a character whose category code begins with 'N'.
404404
405+
Note that this broad category includes characters such as ¾ and ௰.
406+
Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9.
407+
405408
# Examples
406409
```jldoctest
407410
julia> using Unicode
408411
409-
julia> isnumber('9')
412+
julia> isnumeric('௰')
413+
true
414+
415+
julia> isnumeric('9')
410416
true
411417
412-
julia> isnumber('α')
418+
julia> isnumeric('α')
413419
false
414420
415-
julia> isnumber('❤')
421+
julia> isnumeric('❤')
416422
false
417423
```
418424
"""
419-
isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
425+
isnumeric(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
420426

421427
"""
422428
isalnum(c::Char) -> Bool

doc/src/manual/faq.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli
617617

618618
Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference
619619
(variable, object field, or array element) is uninitialized, accessing it will immediately throw
620-
an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref)
621-
functions.
620+
an error. This situation can be detected using the [`isdefined`](@ref) or
621+
[`isassigned`](@ref Base.isassigned) functions.
622622

623623
Some functions are used only for their side effects, and do not need to return a value. In these
624624
cases, the convention is to return the value `nothing`, which is just a singleton object of type
@@ -627,7 +627,7 @@ this convention, and that the REPL does not print anything for it. Some language
627627
would not otherwise have a value also yield `nothing`, for example `if false; end`.
628628

629629
To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the
630-
[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details.
630+
[`missing`](@ref) object. See the [`Missing Values`](@ref missing) section for more details.
631631

632632
The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of
633633
as nothing but rather a tuple of zero values.

stdlib/Unicode/docs/src/index.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Unicode
22

33
```@docs
4-
Unicode.is_assigned_char
5-
Unicode.normalize_string
4+
Unicode.isassigned
5+
Unicode.normalize
66
Unicode.graphemes
77
Unicode.uppercase
88
Unicode.lowercase
@@ -16,7 +16,7 @@ Unicode.iscntrl
1616
Unicode.isdigit
1717
Unicode.isgraph
1818
Unicode.islower
19-
Unicode.isnumber
19+
Unicode.isnumeric
2020
Unicode.isprint
2121
Unicode.ispunct
2222
Unicode.isspace

stdlib/Unicode/src/Unicode.jl

+13-4
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,23 @@ __precompile__(true)
44

55
module Unicode
66

7-
using Base.Unicode: normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
8-
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
7+
using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
8+
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
99
iscntrl, ispunct, isspace, isprint, isgraph,
1010
lowercase, uppercase, titlecase, lcfirst, ucfirst
1111

12-
export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
13-
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
12+
export normalize, graphemes, isassigned, textwidth, isvalid,
13+
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
1414
iscntrl, ispunct, isspace, isprint, isgraph,
1515
lowercase, uppercase, titlecase, lcfirst, ucfirst
1616

17+
# BEGIN 0.7 deprecations
18+
19+
@deprecate isnumber(c::Char) Unicode.isnumeric(c)
20+
@deprecate is_assigned_char(c::Char) Unicode.isassigned(c)
21+
@deprecate normalize_string(s::AbstractString, nf::Symbol; kwargs...) Unicode.normalize(s, nf; kwargs...)
22+
@deprecate normalize_string(s::AbstractString; kwargs...) Unicode.normalize(s; kwargs...)
23+
24+
# END 0.7 deprecations
25+
1726
end

0 commit comments

Comments
 (0)