Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move Unicode-related functions to new Unicode stdlib package #25021

Merged
merged 3 commits into from
Dec 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ The steps required to add a new docstring are listed below:
Examples written within docstrings can be used as testcases known as "doctests" by annotating code blocks with `jldoctest`.

```jldoctest
julia> uppercase("Docstring test")
julia> Unicode.uppercase("Docstring test")
"DOCSTRING TEST"
```

Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,16 @@ Deprecated or removed
* The `sum_kbn` and `cumsum_kbn` functions have been moved to the
[KahanSummation](https://github.com/JuliaMath/KahanSummation.jl) package ([#24869]).

* Unicode-related string functions have been moved to the new `Unicode` standard
library module ([#25021]). This applies to `normalize_string`, `graphemes`,
`is_assigned_char`, `textwidth`, `isascii`, `islower`, `isupper`, `isalpha`,
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.

* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
in the new `Unicode` standard library module ([#25021]).

Command-line option changes
---------------------------

Expand Down Expand Up @@ -1711,3 +1721,4 @@ Command-line option changes
[#24413]: https://github.com/JuliaLang/julia/issues/24413
[#24653]: https://github.com/JuliaLang/julia/issues/24653
[#24869]: https://github.com/JuliaLang/julia/issues/24869
[#25021]: https://github.com/JuliaLang/julia/issues/25021
2 changes: 1 addition & 1 deletion base/arrayshow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
screenwidth -= length(pre) + length(post)
presp = repeat(" ", length(pre)) # indent each row to match pre string
postsp = ""
@assert textwidth(hdots) == textwidth(ddots)
@assert Unicode.textwidth(hdots) == Unicode.textwidth(ddots)
sepsize = length(sep)
rowsA, colsA = indices(X,1), indices(X,2)
m, n = length(rowsA), length(colsA)
Expand Down
6 changes: 3 additions & 3 deletions base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function show(io::IO, c::Char)
return
end
end
if isprint(c)
if Unicode.isprint(c)
write(io, 0x27, c, 0x27)
else
u = UInt32(c)
Expand All @@ -81,6 +81,6 @@ end
function show(io::IO, ::MIME"text/plain", c::Char)
show(io, c)
u = UInt32(c)
print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
print(io, " (category ", UTF8proc.category_abbrev(c), ": ", UTF8proc.category_string(c), ")")
print(io, ": ", Unicode.isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
print(io, " (category ", Unicode.category_abbrev(c), ": ", Unicode.category_string(c), ")")
end
2 changes: 1 addition & 1 deletion base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString)
s = split(line, '*'; keep = false)
map!(strip, s, s)
if length(s) > 1
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
push!(machines,(s[2], cnt))
else
push!(machines,line)
Expand Down
31 changes: 23 additions & 8 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1072,13 +1072,6 @@ function Matrix()
return Matrix(uninitialized, 0, 0)
end

for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper", "xdigit")
f = Symbol("is",name)
@eval import .UTF8proc: $f
@eval @deprecate ($f)(s::AbstractString) all($f, s)
end

# TODO: remove warning for using `_` in parse_input_line in base/client.jl

# Special functions have been moved to a package
Expand Down Expand Up @@ -1512,7 +1505,7 @@ export hex2num
@deprecate convert(::Type{Symbol}, s::AbstractString) Symbol(s)
@deprecate convert(::Type{String}, s::Symbol) String(s)
@deprecate convert(::Type{String}, v::Vector{UInt8}) String(v)
@deprecate convert(::Type{S}, g::UTF8proc.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)
@deprecate convert(::Type{S}, g::Unicode.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)

# Issue #19923
@deprecate ror circshift
Expand Down Expand Up @@ -2186,6 +2179,28 @@ end
@deprecate_moved sum_kbn "KahanSummation"
@deprecate_moved cumsum_kbn "KahanSummation"

# PR #25021
@deprecate_moved normalize_string "Unicode" true true
@deprecate_moved graphemes "Unicode" true true
@deprecate_moved is_assigned_char "Unicode" true true
@deprecate_moved textwidth "Unicode" true true
@deprecate_moved islower "Unicode" true true
@deprecate_moved isupper "Unicode" true true
@deprecate_moved isalpha "Unicode" true true
@deprecate_moved isdigit "Unicode" true true
@deprecate_moved isnumber "Unicode" true true
@deprecate_moved isalnum "Unicode" true true
@deprecate_moved iscntrl "Unicode" true true
@deprecate_moved ispunct "Unicode" true true
@deprecate_moved isspace "Unicode" true true
@deprecate_moved isprint "Unicode" true true
@deprecate_moved isgraph "Unicode" true true
@deprecate_moved lowercase "Unicode" true true
@deprecate_moved uppercase "Unicode" true true
@deprecate_moved titlecase "Unicode" true true
@deprecate_moved lcfirst "Unicode" true true
@deprecate_moved ucfirst "Unicode" true true

# END 0.7 deprecations

# BEGIN 1.0 deprecations
Expand Down
4 changes: 2 additions & 2 deletions base/dict.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

function _truncate_at_width_or_chars(str, width, chars="", truncmark="…")
truncwidth = textwidth(truncmark)
truncwidth = Unicode.textwidth(truncmark)
(width <= 0 || width < truncwidth) && return ""

wid = truncidx = lastidx = 0
idx = start(str)
while !done(str, idx)
lastidx = idx
c, idx = next(str, idx)
wid += textwidth(c)
wid += Unicode.textwidth(c)
wid >= width - truncwidth && truncidx == 0 && (truncidx = lastidx)
(wid >= width || c in chars) && break
end
Expand Down
1 change: 1 addition & 0 deletions base/distributed/Distributed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
binding_module, notify_error, atexit, julia_exename, julia_cmd,
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
shell_escape_posixly, uv_error
using Base.Unicode: isascii, isdigit, isnumeric

# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use

Expand Down
4 changes: 3 additions & 1 deletion base/docs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Text / HTML objects

import Base: print, show, ==, hash
using Base.Unicode

export HTML, @html_str

Expand Down Expand Up @@ -231,7 +232,8 @@ function matchinds(needle, haystack; acronym = false)
for (i, char) in enumerate(haystack)
isempty(chars) && break
while chars[1] == ' ' shift!(chars) end # skip spaces
if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc))
if Unicode.lowercase(char) == Unicode.lowercase(chars[1]) &&
(!acronym || !Unicode.isalpha(lastc))
push!(is, i)
shift!(chars)
end
Expand Down
22 changes: 0 additions & 22 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -725,40 +725,22 @@ export
eachmatch,
endswith,
escape_string,
graphemes,
hex,
hex2bytes,
hex2bytes!,
ind2chr,
info,
is_assigned_char,
isalnum,
isalpha,
isascii,
iscntrl,
isdigit,
isgraph,
islower,
ismatch,
isnumber,
isprint,
ispunct,
isspace,
isupper,
isvalid,
isxdigit,
join,
lcfirst,
logging,
lowercase,
lpad,
lstrip,
match,
matchall,
ncodeunits,
ndigits,
nextind,
normalize_string,
oct,
prevind,
print,
Expand All @@ -785,13 +767,9 @@ export
string,
strip,
summary,
textwidth,
thisind,
titlecase,
transcode,
ucfirst,
unescape_string,
uppercase,
warn,

# random numbers
Expand Down
2 changes: 1 addition & 1 deletion base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function edit(path::AbstractString, line::Integer=0)
cmd = line != 0 ? `$command $path -l $line` : `$command $path`
elseif startswith(name, "subl") || startswith(name, "atom")
cmd = line != 0 ? `$command $path:$line` : `$command $path`
elseif name == "code" || (Sys.iswindows() && uppercase(name) == "CODE.EXE")
elseif name == "code" || (Sys.iswindows() && Unicode.uppercase(name) == "CODE.EXE")
cmd = line != 0 ? `$command -g $path:$line` : `$command -g $path`
elseif startswith(name, "notepad++")
cmd = line != 0 ? `$command $path -n$line` : `$command $path`
Expand Down
4 changes: 3 additions & 1 deletion base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,8 @@ characters from that character until the start of the next line are ignored.
julia> buf = IOBuffer(" text")
IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=8, maxsize=Inf, ptr=1, mark=-1)

julia> using Unicode

julia> skipchars(buf, isspace)
IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=8, maxsize=Inf, ptr=5, mark=-1)

Expand Down Expand Up @@ -889,7 +891,7 @@ julia> countlines(io, '.')
```
"""
function countlines(io::IO, eol::Char='\n')
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
Unicode.isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
aeol = UInt8(eol)
a = Vector{UInt8}(uninitialized, 8192)
nl = 0
Expand Down
4 changes: 2 additions & 2 deletions base/libuv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ function uv_sizeof_req(req)
end

for h in uv_handle_types
@eval const $(Symbol("_sizeof_",lowercase(string(h)))) = uv_sizeof_handle($h)
@eval const $(Symbol("_sizeof_",Unicode.lowercase(string(h)))) = uv_sizeof_handle($h)
end
for r in uv_req_types
@eval const $(Symbol("_sizeof_",lowercase(string(r)))) = uv_sizeof_req($r)
@eval const $(Symbol("_sizeof_",Unicode.lowercase(string(r)))) = uv_sizeof_req($r)
end

uv_handle_data(handle) = ccall(:jl_uv_handle_data,Ptr{Void},(Ptr{Void},),handle)
Expand Down
4 changes: 2 additions & 2 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ elseif Sys.isapple()

# If there is no match, it's possible that the file does exist but HFS+
# performed unicode normalization. See https://developer.apple.com/library/mac/qa/qa1235/_index.html.
isascii(path_basename) && return false
Vector{UInt8}(normalize_string(path_basename, :NFD)) == casepreserved_basename
Unicode.isascii(path_basename) && return false
Vector{UInt8}(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
end
else
# Generic fallback that performs a slow directory listing.
Expand Down
1 change: 1 addition & 0 deletions base/markdown/Markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Markdown

import Base: show, ==
import Core: @doc_str
using Base.Unicode: lowercase, ucfirst, isspace

include(joinpath("parse", "config.jl"))
include(joinpath("parse", "util.jl"))
Expand Down
2 changes: 1 addition & 1 deletion base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ convert(::Type{BigFloat}, x::Union{Float16,Float32}) = BigFloat(Float64(x))
convert(::Type{BigFloat}, x::Rational) = BigFloat(numerator(x)) / BigFloat(denominator(x))

function tryparse(::Type{BigFloat}, s::AbstractString, base::Int=0)
!isempty(s) && isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base)
!isempty(s) && Base.Unicode.isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base)
z = BigFloat()
err = ccall((:mpfr_set_str, :libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, Int32), z, s, base, ROUNDING_MODE[])
err == 0 ? Nullable(z) : Nullable{BigFloat}()
Expand Down
2 changes: 2 additions & 0 deletions base/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,8 @@ entered in the Julia REPL (and most editors, appropriately configured) by typing

# Examples
```jldoctest
julia> using Unicode

julia> map(uppercase∘hex, 250:255)
6-element Array{String,1}:
"FA"
Expand Down
18 changes: 9 additions & 9 deletions base/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ end
function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
c, i, j = parseint_next(s, startpos, endpos)

while isspace(c)
while Unicode.isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
Expand All @@ -66,7 +66,7 @@ function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos:
end
end

while isspace(c)
while Unicode.isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
Expand Down Expand Up @@ -125,10 +125,10 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
Unicode.isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
while !Unicode.isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
Expand All @@ -149,7 +149,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
end
while i <= endpos
c, i = next(s,i)
if !isspace(c)
if !Unicode.isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
Expand All @@ -168,10 +168,10 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
orig_end = endpos

# Ignore leading and trailing whitespace
while isspace(sbuff[startpos]) && startpos <= endpos
while Unicode.isspace(sbuff[startpos]) && startpos <= endpos
startpos = nextind(sbuff, startpos)
end
while isspace(sbuff[endpos]) && endpos >= startpos
while Unicode.isspace(sbuff[endpos]) && endpos >= startpos
endpos = prevind(sbuff, endpos)
end

Expand All @@ -186,7 +186,7 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},

if raise
substr = SubString(sbuff, orig_start, orig_end) # show input string in the error to avoid confusion
if all(isspace, substr)
if all(Unicode.isspace, substr)
throw(ArgumentError("input string only contains whitespace"))
else
throw(ArgumentError("invalid Bool representation: $(repr(substr))"))
Expand Down Expand Up @@ -243,7 +243,7 @@ tryparse_internal(::Type{Float16}, s::AbstractString, startpos::Int, endpos::Int

function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}}, i::Int, e::Int, raise::Bool) where {T<:Real}
# skip initial whitespace
while i ≤ e && isspace(s[i])
while i ≤ e && Unicode.isspace(s[i])
i = nextind(s, i)
end
if i > e
Expand Down
4 changes: 2 additions & 2 deletions base/pkg/entry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function available()
for (pkg, vers) in all_avail
any(x->Types.satisfies("julia", VERSION, x[2].requires), vers) && push!(avail, pkg)
end
sort!(avail, by=lowercase)
sort!(avail, by=Base.Unicode.lowercase)
end

function available(pkg::AbstractString)
Expand Down Expand Up @@ -572,7 +572,7 @@ end

function warnbanner(msg...; label="[ WARNING ]", prefix="")
cols = Base.displaysize(STDERR)[2]
str = rpad(lpad(label, div(cols+textwidth(label), 2), "="), cols, "=")
str = rpad(lpad(label, div(cols+Base.Unicode.textwidth(label), 2), "="), cols, "=")
warn(prefix="", str)
println(STDERR)
warn(prefix=prefix, msg...)
Expand Down
2 changes: 1 addition & 1 deletion base/pkg/reqs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ function write(io::IO, lines::Vector{Line})
end
end
function write(io::IO, reqs::Requires)
for pkg in sort!(collect(keys(reqs)), by=lowercase)
for pkg in sort!(collect(keys(reqs)), by=Unicode.lowercase)
println(io, Requirement(pkg, reqs[pkg]).content)
end
end
Expand Down
Loading