|
3 | 3 | # Various Unicode functionality from the utf8proc library
|
4 | 4 | module UTF8proc
|
5 | 5 |
|
6 |
| -import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase |
| 6 | +import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid |
7 | 7 |
|
8 |
| -export isgraphemebreak, category_code, category_abbrev, category_string |
9 |
| - |
10 |
| -# also exported by Base: |
11 |
| -export normalize_string, graphemes, is_assigned_char, textwidth, isvalid, |
12 |
| - islower, isupper, isalpha, isdigit, isnumber, isalnum, |
13 |
| - iscntrl, ispunct, isspace, isprint, isgraph |
| 8 | +export isgraphemebreak, category_code, category_abbrev, category_string, |
| 9 | + normalize_string, graphemes, is_assigned_char, textwidth, isascii, isvalid, |
| 10 | + islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum, |
| 11 | + iscntrl, ispunct, isspace, isprint, isgraph, |
| 12 | + lowercase, uppercase, titlecase, lcfirst, ucfirst |
14 | 13 |
|
15 | 14 | # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
|
16 | 15 |
|
@@ -533,6 +532,139 @@ true
|
533 | 532 | """
|
534 | 533 | isgraph(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_SO)
|
535 | 534 |
|
| 535 | +""" |
| 536 | + isascii(c::Union{Char,AbstractString}) -> Bool |
| 537 | +
|
| 538 | +Test whether a character belongs to the ASCII character set, or whether this is true for |
| 539 | +all elements of a string. |
| 540 | +
|
| 541 | +# Examples |
| 542 | +```jldoctest |
| 543 | +julia> isascii('a') |
| 544 | +true |
| 545 | +
|
| 546 | +julia> isascii('α') |
| 547 | +false |
| 548 | +
|
| 549 | +julia> isascii("abc") |
| 550 | +true |
| 551 | +
|
| 552 | +julia> isascii("αβγ") |
| 553 | +false |
| 554 | +``` |
| 555 | +""" |
| 556 | +isascii(c::Char) = c < Char(0x80) |
| 557 | +isascii(s::AbstractString) = all(isascii, s) |
| 558 | + |
| 559 | +""" |
| 560 | + isxdigit(c::Char) -> Bool |
| 561 | +
|
| 562 | +Test whether a character is a valid hexadecimal digit. Note that this does not |
| 563 | +include `x` (as in the standard `0x` prefix). |
| 564 | +
|
| 565 | +# Examples |
| 566 | +```jldoctest |
| 567 | +julia> isxdigit('a') |
| 568 | +true |
| 569 | +
|
| 570 | +julia> isxdigit('x') |
| 571 | +false |
| 572 | +``` |
| 573 | +""" |
| 574 | +isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F' |
| 575 | + |
| 576 | +## uppercase, lowercase, and titlecase transformations ## |
| 577 | + |
| 578 | +""" |
| 579 | + uppercase(s::AbstractString) |
| 580 | +
|
| 581 | +Return `s` with all characters converted to uppercase. |
| 582 | +
|
| 583 | +# Examples |
| 584 | +```jldoctest |
| 585 | +julia> uppercase("Julia") |
| 586 | +"JULIA" |
| 587 | +``` |
| 588 | +""" |
| 589 | +uppercase(s::AbstractString) = map(uppercase, s) |
| 590 | + |
| 591 | +""" |
| 592 | + lowercase(s::AbstractString) |
| 593 | +
|
| 594 | +Return `s` with all characters converted to lowercase. |
| 595 | +
|
| 596 | +# Examples |
| 597 | +```jldoctest |
| 598 | +julia> lowercase("STRINGS AND THINGS") |
| 599 | +"strings and things" |
| 600 | +``` |
| 601 | +""" |
| 602 | +lowercase(s::AbstractString) = map(lowercase, s) |
| 603 | + |
| 604 | +""" |
| 605 | + titlecase(s::AbstractString) |
| 606 | +
|
| 607 | +Capitalize the first character of each word in `s`. |
| 608 | +See also [`ucfirst`](@ref) to capitalize only the first |
| 609 | +character in `s`. |
| 610 | +
|
| 611 | +# Examples |
| 612 | +```jldoctest |
| 613 | +julia> titlecase("the julia programming language") |
| 614 | +"The Julia Programming Language" |
| 615 | +``` |
| 616 | +""" |
| 617 | +function titlecase(s::AbstractString) |
| 618 | + startword = true |
| 619 | + b = IOBuffer() |
| 620 | + for c in s |
| 621 | + if isspace(c) |
| 622 | + print(b, c) |
| 623 | + startword = true |
| 624 | + else |
| 625 | + print(b, startword ? titlecase(c) : c) |
| 626 | + startword = false |
| 627 | + end |
| 628 | + end |
| 629 | + return String(take!(b)) |
| 630 | +end |
| 631 | + |
| 632 | +""" |
| 633 | + ucfirst(s::AbstractString) |
| 634 | +
|
| 635 | +Return `string` with the first character converted to uppercase |
| 636 | +(technically "title case" for Unicode). |
| 637 | +See also [`titlecase`](@ref) to capitalize the first character of |
| 638 | +every word in `s`. |
| 639 | +
|
| 640 | +# Examples |
| 641 | +```jldoctest |
| 642 | +julia> ucfirst("python") |
| 643 | +"Python" |
| 644 | +``` |
| 645 | +""" |
| 646 | +function ucfirst(s::AbstractString) |
| 647 | + isempty(s) && return s |
| 648 | + c = s[1] |
| 649 | + tc = titlecase(c) |
| 650 | + return c==tc ? s : string(tc,s[nextind(s,1):end]) |
| 651 | +end |
| 652 | + |
| 653 | +""" |
| 654 | + lcfirst(s::AbstractString) |
| 655 | +
|
| 656 | +Return `string` with the first character converted to lowercase. |
| 657 | +
|
| 658 | +# Examples |
| 659 | +```jldoctest |
| 660 | +julia> lcfirst("Julia") |
| 661 | +"julia" |
| 662 | +``` |
| 663 | +""" |
| 664 | +function lcfirst(s::AbstractString) |
| 665 | + isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end]) |
| 666 | +end |
| 667 | + |
536 | 668 | ############################################################################
|
537 | 669 | # iterators for grapheme segmentation
|
538 | 670 |
|
|
0 commit comments