Skip to content

Commit 66b5e9c

Browse files
committed
Move Unicode-related functions to new Unicode stdlib package
1 parent ff045af commit 66b5e9c

File tree

14 files changed

+208
-195
lines changed

14 files changed

+208
-195
lines changed

base/distributed/Distributed.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
1414
VERSION_STRING, sync_begin, sync_add, sync_end, async_run_thunk,
1515
binding_module, notify_error, atexit, julia_exename, julia_cmd,
1616
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
17-
shell_escape_posixly, uv_error
17+
shell_escape_posixly, uv_error, isascii
1818

1919
# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use
2020

base/exports.jl

-18
Original file line numberDiff line numberDiff line change
@@ -731,24 +731,10 @@ export
731731
hex2bytes!,
732732
ind2chr,
733733
info,
734-
is_assigned_char,
735-
isalnum,
736-
isalpha,
737734
isascii,
738-
iscntrl,
739-
isdigit,
740-
isgraph,
741-
islower,
742735
ismatch,
743-
isnumber,
744-
isprint,
745-
ispunct,
746-
isspace,
747-
isupper,
748736
isvalid,
749-
isxdigit,
750737
join,
751-
lcfirst,
752738
logging,
753739
lowercase,
754740
lpad,
@@ -758,7 +744,6 @@ export
758744
ncodeunits,
759745
ndigits,
760746
nextind,
761-
normalize_string,
762747
oct,
763748
prevind,
764749
print,
@@ -785,11 +770,8 @@ export
785770
string,
786771
strip,
787772
summary,
788-
textwidth,
789773
thisind,
790-
titlecase,
791774
transcode,
792-
ucfirst,
793775
unescape_string,
794776
uppercase,
795777
warn,

base/markdown/Markdown.jl

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ module Markdown
77

88
import Base: show, ==
99
import Core: @doc_str
10+
using Base: ucfirst
1011

1112
include(joinpath("parse", "config.jl"))
1213
include(joinpath("parse", "util.jl"))

base/repl/LineEdit.jl

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import ..Terminals: raw!, width, height, cmove, getX,
99

1010
import Base: ensureroom, peek, show, AnyDict, position
1111

12+
using Base: textwidth
13+
1214
abstract type TextInterface end
1315
abstract type ModeState end
1416

base/strings/basic.jl

-133
Original file line numberDiff line numberDiff line change
@@ -469,143 +469,10 @@ next(e::EachStringIndex, state) = (state, nextind(e.s, state))
469469
done(e::EachStringIndex, state) = done(e.s, state)
470470
eltype(::Type{EachStringIndex}) = Int
471471

472-
"""
473-
isascii(c::Union{Char,AbstractString}) -> Bool
474-
475-
Test whether a character belongs to the ASCII character set, or whether this is true for
476-
all elements of a string.
477-
478-
# Examples
479-
```jldoctest
480-
julia> isascii('a')
481-
true
482-
483-
julia> isascii('α')
484-
false
485-
486-
julia> isascii("abc")
487-
true
488-
489-
julia> isascii("αβγ")
490-
false
491-
```
492-
"""
493-
isascii(c::Char) = c < Char(0x80)
494-
isascii(s::AbstractString) = all(isascii, s)
495-
496472
## string promotion rules ##
497473

498474
promote_rule(::Type{<:AbstractString}, ::Type{<:AbstractString}) = String
499475

500-
"""
501-
isxdigit(c::Char) -> Bool
502-
503-
Test whether a character is a valid hexadecimal digit. Note that this does not
504-
include `x` (as in the standard `0x` prefix).
505-
506-
# Examples
507-
```jldoctest
508-
julia> isxdigit('a')
509-
true
510-
511-
julia> isxdigit('x')
512-
false
513-
```
514-
"""
515-
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
516-
517-
## uppercase, lowercase, and titlecase transformations ##
518-
519-
"""
520-
uppercase(s::AbstractString)
521-
522-
Return `s` with all characters converted to uppercase.
523-
524-
# Examples
525-
```jldoctest
526-
julia> uppercase("Julia")
527-
"JULIA"
528-
```
529-
"""
530-
uppercase(s::AbstractString) = map(uppercase, s)
531-
532-
"""
533-
lowercase(s::AbstractString)
534-
535-
Return `s` with all characters converted to lowercase.
536-
537-
# Examples
538-
```jldoctest
539-
julia> lowercase("STRINGS AND THINGS")
540-
"strings and things"
541-
```
542-
"""
543-
lowercase(s::AbstractString) = map(lowercase, s)
544-
545-
"""
546-
titlecase(s::AbstractString)
547-
548-
Capitalize the first character of each word in `s`.
549-
See also [`ucfirst`](@ref) to capitalize only the first
550-
character in `s`.
551-
552-
# Examples
553-
```jldoctest
554-
julia> titlecase("the julia programming language")
555-
"The Julia Programming Language"
556-
```
557-
"""
558-
function titlecase(s::AbstractString)
559-
startword = true
560-
b = IOBuffer()
561-
for c in s
562-
if isspace(c)
563-
print(b, c)
564-
startword = true
565-
else
566-
print(b, startword ? titlecase(c) : c)
567-
startword = false
568-
end
569-
end
570-
return String(take!(b))
571-
end
572-
573-
"""
574-
ucfirst(s::AbstractString)
575-
576-
Return `string` with the first character converted to uppercase
577-
(technically "title case" for Unicode).
578-
See also [`titlecase`](@ref) to capitalize the first character of
579-
every word in `s`.
580-
581-
# Examples
582-
```jldoctest
583-
julia> ucfirst("python")
584-
"Python"
585-
```
586-
"""
587-
function ucfirst(s::AbstractString)
588-
isempty(s) && return s
589-
c = s[1]
590-
tc = titlecase(c)
591-
return c==tc ? s : string(tc,s[nextind(s,1):end])
592-
end
593-
594-
"""
595-
lcfirst(s::AbstractString)
596-
597-
Return `string` with the first character converted to lowercase.
598-
599-
# Examples
600-
```jldoctest
601-
julia> lcfirst("Julia")
602-
"julia"
603-
```
604-
"""
605-
function lcfirst(s::AbstractString)
606-
isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
607-
end
608-
609476
## string map, filter, has ##
610477

611478
function map(f, s::AbstractString)

base/strings/utf8proc.jl

+139-7
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
# Various Unicode functionality from the utf8proc library
44
module UTF8proc
55

6-
import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase
6+
import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid
77

8-
export isgraphemebreak, category_code, category_abbrev, category_string
9-
10-
# also exported by Base:
11-
export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
12-
islower, isupper, isalpha, isdigit, isnumber, isalnum,
13-
iscntrl, ispunct, isspace, isprint, isgraph
8+
export isgraphemebreak, category_code, category_abbrev, category_string,
9+
normalize_string, graphemes, is_assigned_char, textwidth, isascii, isvalid,
10+
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
11+
iscntrl, ispunct, isspace, isprint, isgraph,
12+
lowercase, uppercase, titlecase, lcfirst, ucfirst
1413

1514
# whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
1615

@@ -533,6 +532,139 @@ true
533532
"""
534533
isgraph(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_SO)
535534

535+
"""
536+
isascii(c::Union{Char,AbstractString}) -> Bool
537+
538+
Test whether a character belongs to the ASCII character set, or whether this is true for
539+
all elements of a string.
540+
541+
# Examples
542+
```jldoctest
543+
julia> isascii('a')
544+
true
545+
546+
julia> isascii('α')
547+
false
548+
549+
julia> isascii("abc")
550+
true
551+
552+
julia> isascii("αβγ")
553+
false
554+
```
555+
"""
556+
isascii(c::Char) = c < Char(0x80)
557+
isascii(s::AbstractString) = all(isascii, s)
558+
559+
"""
560+
isxdigit(c::Char) -> Bool
561+
562+
Test whether a character is a valid hexadecimal digit. Note that this does not
563+
include `x` (as in the standard `0x` prefix).
564+
565+
# Examples
566+
```jldoctest
567+
julia> isxdigit('a')
568+
true
569+
570+
julia> isxdigit('x')
571+
false
572+
```
573+
"""
574+
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
575+
576+
## uppercase, lowercase, and titlecase transformations ##
577+
578+
"""
579+
uppercase(s::AbstractString)
580+
581+
Return `s` with all characters converted to uppercase.
582+
583+
# Examples
584+
```jldoctest
585+
julia> uppercase("Julia")
586+
"JULIA"
587+
```
588+
"""
589+
uppercase(s::AbstractString) = map(uppercase, s)
590+
591+
"""
592+
lowercase(s::AbstractString)
593+
594+
Return `s` with all characters converted to lowercase.
595+
596+
# Examples
597+
```jldoctest
598+
julia> lowercase("STRINGS AND THINGS")
599+
"strings and things"
600+
```
601+
"""
602+
lowercase(s::AbstractString) = map(lowercase, s)
603+
604+
"""
605+
titlecase(s::AbstractString)
606+
607+
Capitalize the first character of each word in `s`.
608+
See also [`ucfirst`](@ref) to capitalize only the first
609+
character in `s`.
610+
611+
# Examples
612+
```jldoctest
613+
julia> titlecase("the julia programming language")
614+
"The Julia Programming Language"
615+
```
616+
"""
617+
function titlecase(s::AbstractString)
618+
startword = true
619+
b = IOBuffer()
620+
for c in s
621+
if isspace(c)
622+
print(b, c)
623+
startword = true
624+
else
625+
print(b, startword ? titlecase(c) : c)
626+
startword = false
627+
end
628+
end
629+
return String(take!(b))
630+
end
631+
632+
"""
633+
ucfirst(s::AbstractString)
634+
635+
Return `string` with the first character converted to uppercase
636+
(technically "title case" for Unicode).
637+
See also [`titlecase`](@ref) to capitalize the first character of
638+
every word in `s`.
639+
640+
# Examples
641+
```jldoctest
642+
julia> ucfirst("python")
643+
"Python"
644+
```
645+
"""
646+
function ucfirst(s::AbstractString)
647+
isempty(s) && return s
648+
c = s[1]
649+
tc = titlecase(c)
650+
return c==tc ? s : string(tc,s[nextind(s,1):end])
651+
end
652+
653+
"""
654+
lcfirst(s::AbstractString)
655+
656+
Return `string` with the first character converted to lowercase.
657+
658+
# Examples
659+
```jldoctest
660+
julia> lcfirst("Julia")
661+
"julia"
662+
```
663+
"""
664+
function lcfirst(s::AbstractString)
665+
isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
666+
end
667+
536668
############################################################################
537669
# iterators for grapheme segmentation
538670

base/sysimg.jl

+1
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ Base.require(:Profile)
485485
Base.require(:SharedArrays)
486486
Base.require(:SuiteSparse)
487487
Base.require(:Test)
488+
Base.require(:Unicode)
488489

489490
@eval Base begin
490491
@deprecate_binding Test root_module(:Test) true ", run `using Test` instead"

0 commit comments

Comments
 (0)