Skip to content

Commit e04b7dc

Browse files
committed
Deprecate isnumber(), is_assigned_char() and normalize_string()
isnumeric() is consistent with Python and Rust (but not Go), and less easy to confuse with isdigit(). Improve documentation to make confusion less easy. Also fix a few uses where isdigit() is more appropriate than isnumber().
1 parent c076efa commit e04b7dc

File tree

11 files changed

+112
-94
lines changed

11 files changed

+112
-94
lines changed

NEWS.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,10 @@ Deprecated or removed
744744
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
745745
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.
746746

747+
* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
748+
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
749+
in the new `Unicode` standard library module ([#25021]).
750+
747751
Command-line option changes
748752
---------------------------
749753

@@ -1708,7 +1712,7 @@ Command-line option changes
17081712
[#24221]: https://github.com/JuliaLang/julia/issues/24221
17091713
[#24240]: https://github.com/JuliaLang/julia/issues/24240
17101714
[#24245]: https://github.com/JuliaLang/julia/issues/24245
1711-
[#24250]: https://github.com/JuliaLang/julia/issues/24250
1715+
[#24250]: https://github.com/JuliaLang/julia/issues/2425
17121716
[#24263]: https://github.com/JuliaLang/julia/issues/24263
17131717
[#24279]: https://github.com/JuliaLang/julia/issues/24279
17141718
[#24281]: https://github.com/JuliaLang/julia/issues/24281

base/client.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString)
361361
s = split(line, '*'; keep = false)
362362
map!(strip, s, s)
363363
if length(s) > 1
364-
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
364+
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
365365
push!(machines,(s[2], cnt))
366366
else
367367
push!(machines,line)

base/distributed/Distributed.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
1515
binding_module, notify_error, atexit, julia_exename, julia_cmd,
1616
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
1717
shell_escape_posixly, uv_error
18-
using Base.UTF8proc: isascii, isdigit, isnumber
18+
using Base.UTF8proc: isascii, isdigit, isnumeric
1919

2020
# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use
2121

base/precompile.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ precompile(Tuple{typeof(Base.lstrip), Base.SubString{String}, Array{Char, 1}})
6868
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), String, Char})
6969
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), Base.SubString{String}, Char})
7070
precompile(Tuple{typeof(Base.map!), typeof(Base.strip), Array{Base.SubString{String}, 1}, Array{Base.SubString{String}, 1}})
71-
precompile(Tuple{typeof(Base.UTF8proc.isnumber), Base.SubString{String}})
71+
precompile(Tuple{typeof(Base.UTF8proc.isnumeric), Base.SubString{String}})
7272
precompile(Tuple{Type{Core.Inference.Generator{I, F} where F where I}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
7373
precompile(Tuple{Type{Core.Inference.Generator{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}, Type{Core.Inference.Const}}}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
7474
precompile(Tuple{typeof(Core.Inference.convert), Type{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})

base/regex.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -338,11 +338,11 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
338338
if repl[next_i] == SUB_CHAR
339339
write(io, SUB_CHAR)
340340
i = nextind(repl, next_i)
341-
elseif UTF8proc.isnumber(repl[next_i])
341+
elseif UTF8proc.isdigit(repl[next_i])
342342
group = parse(Int, repl[next_i])
343343
i = nextind(repl, next_i)
344344
while i <= e
345-
if UTF8proc.isnumber(repl[i])
345+
if UTF8proc.isdigit(repl[i])
346346
group = 10group + parse(Int, repl[i])
347347
i = nextind(repl, i)
348348
else
@@ -364,7 +364,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
364364
end
365365
# TODO: avoid this allocation
366366
groupname = SubString(repl, groupstart, prevind(repl, i))
367-
if all(UTF8proc.isnumber,groupname)
367+
if all(UTF8proc.isdigit, groupname)
368368
_write_capture(io, re, parse(Int, groupname))
369369
else
370370
group = PCRE.substring_number_from_name(re.regex, groupname)

base/strings/utf8proc.jl

+23-17
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ end
148148

149149
utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)
150150

151-
function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
151+
function normalize(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
152152
flags = 0
153153
stable && (flags = flags | UTF8PROC_STABLE)
154154
compat && (flags = flags | UTF8PROC_COMPAT)
@@ -173,7 +173,7 @@ function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=fa
173173
end
174174

175175
"""
176-
normalize_string(s::AbstractString, normalform::Symbol)
176+
normalize(s::AbstractString, normalform::Symbol)
177177
178178
Normalize the string `s` according to one of the four "normal forms" of the Unicode
179179
standard: `normalform` can be `:NFC`, `:NFD`, `:NFKC`, or `:NFKD`. Normal forms C
@@ -185,7 +185,7 @@ canonical choice (e.g. they expand ligatures into the individual characters), wi
185185
being more compact.
186186
187187
Alternatively, finer control and additional transformations may be be obtained by calling
188-
`normalize_string(s; keywords...)`, where any number of the following boolean keywords
188+
`normalize(s; keywords...)`, where any number of the following boolean keywords
189189
options (which all default to `false` except for `compose`) are specified:
190190
191191
* `compose=false`: do not perform canonical composition
@@ -209,17 +209,17 @@ For example, NFKC corresponds to the options `compose=true, compat=true, stable=
209209
210210
# Examples
211211
```jldoctest
212-
julia> "μ" == normalize_string("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
212+
julia> "μ" == normalize("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
213213
true
214214
215-
julia> normalize_string("JuLiA", casefold=true)
215+
julia> normalize("JuLiA", casefold=true)
216216
"julia"
217217
218-
julia> normalize_string("JúLiA", stripmark=true)
218+
julia> normalize("JúLiA", stripmark=true)
219219
"JuLiA"
220220
```
221221
"""
222-
function normalize_string(s::AbstractString, nf::Symbol)
222+
function normalize(s::AbstractString, nf::Symbol)
223223
utf8proc_map(s, nf == :NFC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE) :
224224
nf == :NFD ? (UTF8PROC_STABLE | UTF8PROC_DECOMPOSE) :
225225
nf == :NFKC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE
@@ -275,20 +275,20 @@ category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UI
275275
category_string(c) = category_strings[category_code(c)+1]
276276

277277
"""
278-
is_assigned_char(c) -> Bool
278+
isassigned(c) -> Bool
279279
280280
Returns `true` if the given char or integer is an assigned Unicode code point.
281281
282282
# Examples
283283
```jldoctest
284-
julia> is_assigned_char(101)
284+
julia> isassigned(101)
285285
true
286286
287-
julia> is_assigned_char('\\x01')
287+
julia> isassigned('\\x01')
288288
true
289289
```
290290
"""
291-
is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
291+
isassigned(c) = category_code(c) != UTF8PROC_CATEGORY_CN
292292

293293
## libc character class predicates ##
294294

@@ -342,7 +342,7 @@ end
342342
"""
343343
isdigit(c::Char) -> Bool
344344
345-
Tests whether a character is a numeric digit (0-9).
345+
Tests whether a character is a decimal digit (0-9).
346346
347347
# Examples
348348
```jldoctest
@@ -380,25 +380,31 @@ false
380380
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)
381381

382382
"""
383-
isnumber(c::Char) -> Bool
383+
isnumeric(c::Char) -> Bool
384384
385385
Tests whether a character is numeric.
386386
A character is classified as numeric if it belongs to the Unicode general category Number,
387387
i.e. a character whose category code begins with 'N'.
388388
389+
Note that this broad category includes characters such as ¾ and ௰.
390+
Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9.
391+
389392
# Examples
390393
```jldoctest
391-
julia> isnumber('9')
394+
julia> isnumeric('௰')
395+
true
396+
397+
julia> isnumeric('9')
392398
true
393399
394-
julia> isnumber('α')
400+
julia> isnumeric('α')
395401
false
396402
397-
julia> isnumber('❤')
403+
julia> isnumeric('❤')
398404
false
399405
```
400406
"""
401-
isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
407+
isnumeric(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
402408

403409
"""
404410
isalnum(c::Char) -> Bool

doc/src/manual/faq.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli
617617

618618
Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference
619619
(variable, object field, or array element) is uninitialized, accessing it will immediately throw
620-
an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref)
621-
functions.
620+
an error. This situation can be detected using the [`isdefined`](@ref) or
621+
[`isassigned`](@ref Base.isassigned) functions.
622622

623623
Some functions are used only for their side effects, and do not need to return a value. In these
624624
cases, the convention is to return the value `nothing`, which is just a singleton object of type
@@ -627,7 +627,7 @@ this convention, and that the REPL does not print anything for it. Some language
627627
would not otherwise have a value also yield `nothing`, for example `if false; end`.
628628

629629
To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the
630-
[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details.
630+
[`missing`](@ref) object. See the [`Missing Values`](@ref missing) section for more details.
631631

632632
The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of
633633
as nothing but rather a tuple of zero values.

stdlib/Unicode/docs/src/index.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Unicode
22

33
```@docs
4-
Unicode.is_assigned_char
5-
Unicode.normalize_string
4+
Unicode.isassigned
5+
Unicode.normalize
66
Unicode.graphemes
77
Unicode.uppercase
88
Unicode.lowercase
@@ -16,7 +16,7 @@ Unicode.iscntrl
1616
Unicode.isdigit
1717
Unicode.isgraph
1818
Unicode.islower
19-
Unicode.isnumber
19+
Unicode.isnumeric
2020
Unicode.isprint
2121
Unicode.ispunct
2222
Unicode.isspace

stdlib/Unicode/src/Unicode.jl

+12-4
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,22 @@ __precompile__(true)
44

55
module Unicode
66

7-
using Base.UTF8proc: normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
8-
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
7+
using Base.UTF8proc: normalize, graphemes, isassigned, textwidth, isvalid,
8+
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
99
iscntrl, ispunct, isspace, isprint, isgraph,
1010
lowercase, uppercase, titlecase, lcfirst, ucfirst
1111

12-
export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
13-
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
12+
export normalize, graphemes, isassigned, textwidth, isvalid,
13+
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
1414
iscntrl, ispunct, isspace, isprint, isgraph,
1515
lowercase, uppercase, titlecase, lcfirst, ucfirst
1616

17+
# BEGIN 0.7 deprecations
18+
19+
@deprecate isnumber(c::Char) Unicode.isnumeric(c)
20+
@deprecate is_assigned_char(c::Char) Unicode.isassigned(c)
21+
@deprecate normalize_string(s::AbstractString, nf::Symbol; kwargs...) Unicode.normalize(s, nf; kwargs...)
22+
23+
# END 0.7 deprecations
24+
1725
end

0 commit comments

Comments
 (0)