-
Notifications
You must be signed in to change notification settings - Fork 305
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
encoding/ianaindex: add ASCII, document Index.Encoding
Index.Encoding returns a nil Encoding in case the charset is valid but unsupported by the library. Document this behavior. Because of this, US-ASCII is seen as unsupported. Register it as a regular encoding. The decoder replaces non-ASCII bytes with the unicode replacement character. The encoder returns a RepertoireError when a non-ASCII rune is encountered. Fixes golang/go#19421 Change-Id: I4c24ba2114a5012be88488e63aa6e57df955eb96 GitHub-Last-Rev: 418ee6d GitHub-Pull-Request: #10 Reviewed-on: https://go-review.googlesource.com/c/text/+/212077 Reviewed-by: Daniel Martí <[email protected]> Reviewed-by: Marcel van Lohuizen <[email protected]> Run-TryBot: Daniel Martí <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
- Loading branch information
Showing
4 changed files
with
118 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// Copyright 2019 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package ianaindex | ||
|
||
import ( | ||
"unicode" | ||
"unicode/utf8" | ||
|
||
"golang.org/x/text/encoding" | ||
"golang.org/x/text/encoding/internal" | ||
"golang.org/x/text/transform" | ||
"golang.org/x/text/encoding/internal/identifier" | ||
) | ||
|
||
type asciiDecoder struct { | ||
transform.NopResetter | ||
} | ||
|
||
func (d asciiDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||
for _, c := range src { | ||
if c > unicode.MaxASCII { | ||
r := unicode.ReplacementChar | ||
if nDst + utf8.RuneLen(r) > len(dst) { | ||
err = transform.ErrShortDst | ||
break | ||
} | ||
nDst += utf8.EncodeRune(dst[nDst:], r) | ||
nSrc++ | ||
continue | ||
} | ||
|
||
if nDst >= len(dst) { | ||
err = transform.ErrShortDst | ||
break | ||
} | ||
dst[nDst] = c | ||
nDst++ | ||
nSrc++ | ||
} | ||
return nDst, nSrc, err | ||
} | ||
|
||
type asciiEncoder struct { | ||
transform.NopResetter | ||
} | ||
|
||
func (d asciiEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||
for _, c := range src { | ||
if c > unicode.MaxASCII { | ||
err = internal.RepertoireError(encoding.ASCIISub) | ||
break | ||
} | ||
|
||
if nDst >= len(dst) { | ||
err = transform.ErrShortDst | ||
break | ||
} | ||
dst[nDst] = c | ||
nDst++ | ||
nSrc++ | ||
} | ||
return nDst, nSrc, err | ||
} | ||
|
||
var asciiEnc = &internal.Encoding{ | ||
Encoding: &internal.SimpleEncoding{ | ||
asciiDecoder{}, | ||
asciiEncoder{}, | ||
}, | ||
Name: "US-ASCII", | ||
MIB: identifier.ASCII, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright 2019 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package ianaindex | ||
|
||
import ( | ||
"unicode" | ||
"testing" | ||
|
||
"golang.org/x/text/encoding" | ||
) | ||
|
||
func TestASCIIDecoder(t *testing.T) { | ||
repl := string(unicode.ReplacementChar) | ||
input := "Comment Candide fut élevé dans un beau château" | ||
want := "Comment Candide fut " + repl + repl + "lev" + repl + repl + " dans un beau ch" + repl + repl + "teau" | ||
got, err := asciiEnc.NewDecoder().String(input) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %v", err) | ||
} | ||
if got != want { | ||
t.Fatalf("asciiEnc.NewDecoder().String() = %q, want %q", got, want) | ||
} | ||
} | ||
|
||
func TestASCIIEncoder(t *testing.T) { | ||
repl := string(encoding.ASCIISub) | ||
input := "Comment Candide fut élevé dans un beau château" | ||
want := "Comment Candide fut " + repl + "lev" + repl + " dans un beau ch" + repl + "teau" | ||
got, err := encoding.ReplaceUnsupported(asciiEnc.NewEncoder()).String(input) | ||
if err != nil { | ||
t.Fatalf("unexpected error: %v", err) | ||
} | ||
if got != want { | ||
t.Fatalf("asciiEnc.NewEncoder().String() = %q, want %q", got, want) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters