mirror of https://github.com/docker/cli.git
Refactor stringutils and fix docker search output form when the description has CJK character
Signed-off-by: Chao Wang <wangchao.fnst@cn.fujitsu.com>
This commit is contained in:
parent
6ef0ea82ea
commit
926b20fcb5
|
@ -10,7 +10,6 @@ import (
|
|||
"github.com/docker/distribution/reference"
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
"github.com/docker/go-units"
|
||||
)
|
||||
|
||||
|
@ -165,7 +164,7 @@ func (c *containerContext) Image() string {
|
|||
func (c *containerContext) Command() string {
|
||||
command := c.c.Command
|
||||
if c.trunc {
|
||||
command = stringutils.Ellipsis(command, 20)
|
||||
command = Ellipsis(command, 20)
|
||||
}
|
||||
return strconv.Quote(command)
|
||||
}
|
||||
|
@ -227,7 +226,7 @@ func (c *containerContext) Mounts() string {
|
|||
name = m.Name
|
||||
}
|
||||
if c.trunc {
|
||||
name = stringutils.Ellipsis(name, 15)
|
||||
name = Ellipsis(name, 15)
|
||||
}
|
||||
mounts = append(mounts, name)
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ func TestContainerPsContext(t *testing.T) {
|
|||
Source: "/a/path",
|
||||
},
|
||||
},
|
||||
}, true, "this-is-a-lo...", ctx.Mounts},
|
||||
}, true, "this-is-a-long…", ctx.Mounts},
|
||||
{types.Container{
|
||||
Mounts: []types.MountPoint{
|
||||
{
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
package formatter
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/width"
|
||||
)
|
||||
|
||||
// charWidth returns the number of horizontal positions a character occupies,
|
||||
// and is used to account for wide characters when displaying strings.
|
||||
//
|
||||
// In a broad sense, wide characters include East Asian Wide, East Asian Full-width,
|
||||
// (when not in East Asian context) see http://unicode.org/reports/tr11/.
|
||||
func charWidth(r rune) int {
|
||||
switch width.LookupRune(r).Kind() {
|
||||
case width.EastAsianWide, width.EastAsianFullwidth:
|
||||
return 2
|
||||
default:
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
// Ellipsis truncates a string to fit within maxDisplayWidth, and appends ellipsis (…).
|
||||
// For maxDisplayWidth of 1 and lower, no ellipsis is appended.
|
||||
// For maxDisplayWidth of 1, first char of string will return even if its width > 1.
|
||||
func Ellipsis(s string, maxDisplayWidth int) string {
|
||||
if maxDisplayWidth <= 0 {
|
||||
return ""
|
||||
}
|
||||
rs := []rune(s)
|
||||
if maxDisplayWidth == 1 {
|
||||
return string(rs[0])
|
||||
}
|
||||
|
||||
byteLen := len(s)
|
||||
if byteLen == utf8.RuneCountInString(s) {
|
||||
if byteLen <= maxDisplayWidth {
|
||||
return s
|
||||
}
|
||||
return string(rs[:maxDisplayWidth-1]) + "…"
|
||||
}
|
||||
|
||||
var (
|
||||
display []int
|
||||
displayWidth int
|
||||
)
|
||||
for _, r := range rs {
|
||||
cw := charWidth(r)
|
||||
displayWidth += cw
|
||||
display = append(display, displayWidth)
|
||||
}
|
||||
if displayWidth <= maxDisplayWidth {
|
||||
return s
|
||||
}
|
||||
for i := range display {
|
||||
if display[i] <= maxDisplayWidth-1 && display[i+1] > maxDisplayWidth-1 {
|
||||
return string(rs[:i+1]) + "…"
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package formatter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestEllipsis(t *testing.T) {
|
||||
var testcases = []struct {
|
||||
source string
|
||||
width int
|
||||
expected string
|
||||
}{
|
||||
{source: "t🐳ststring", width: 0, expected: ""},
|
||||
{source: "t🐳ststring", width: 1, expected: "t"},
|
||||
{source: "t🐳ststring", width: 2, expected: "t…"},
|
||||
{source: "t🐳ststring", width: 6, expected: "t🐳st…"},
|
||||
{source: "t🐳ststring", width: 20, expected: "t🐳ststring"},
|
||||
{source: "你好世界teststring", width: 0, expected: ""},
|
||||
{source: "你好世界teststring", width: 1, expected: "你"},
|
||||
{source: "你好世界teststring", width: 3, expected: "你…"},
|
||||
{source: "你好世界teststring", width: 6, expected: "你好…"},
|
||||
{source: "你好世界teststring", width: 20, expected: "你好世界teststring"},
|
||||
}
|
||||
|
||||
for _, testcase := range testcases {
|
||||
assert.Equal(t, testcase.expected, Ellipsis(testcase.source, testcase.width))
|
||||
}
|
||||
}
|
|
@ -7,7 +7,6 @@ import (
|
|||
|
||||
"github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
units "github.com/docker/go-units"
|
||||
)
|
||||
|
||||
|
@ -93,7 +92,7 @@ func (c *historyContext) CreatedSince() string {
|
|||
func (c *historyContext) CreatedBy() string {
|
||||
createdBy := strings.Replace(c.h.CreatedBy, "\t", " ", -1)
|
||||
if c.trunc {
|
||||
return stringutils.Ellipsis(createdBy, 45)
|
||||
return Ellipsis(createdBy, 45)
|
||||
}
|
||||
return createdBy
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@ import (
|
|||
|
||||
"github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
|
@ -96,7 +95,7 @@ func TestHistoryContext_CreatedBy(t *testing.T) {
|
|||
historyContext{
|
||||
h: image.HistoryResponseItem{CreatedBy: withTabs},
|
||||
trunc: true,
|
||||
}, stringutils.Ellipsis(expected, 45), ctx.CreatedBy,
|
||||
}, Ellipsis(expected, 45), ctx.CreatedBy,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -191,7 +190,7 @@ imageID3 24 hours ago /bin/bash ls
|
|||
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
||||
`
|
||||
expectedTrunc := `IMAGE CREATED CREATED BY SIZE COMMENT
|
||||
imageID1 24 hours ago /bin/bash ls && npm i && npm run test && k... 183MB Hi
|
||||
imageID1 24 hours ago /bin/bash ls && npm i && npm run test && kar… 183MB Hi
|
||||
imageID2 24 hours ago /bin/bash echo 183MB Hi
|
||||
imageID3 24 hours ago /bin/bash ls 183MB Hi
|
||||
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -80,7 +79,7 @@ func (c *pluginContext) Description() string {
|
|||
desc := strings.Replace(c.p.Config.Description, "\n", "", -1)
|
||||
desc = strings.Replace(desc, "\r", "", -1)
|
||||
if c.trunc {
|
||||
desc = stringutils.Ellipsis(desc, 45)
|
||||
desc = Ellipsis(desc, 45)
|
||||
}
|
||||
|
||||
return desc
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"strings"
|
||||
|
||||
registry "github.com/docker/docker/api/types/registry"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -73,7 +72,7 @@ func (c *searchContext) Description() string {
|
|||
desc := strings.Replace(c.s.Description, "\n", " ", -1)
|
||||
desc = strings.Replace(desc, "\r", " ", -1)
|
||||
if c.trunc {
|
||||
desc = stringutils.Ellipsis(desc, 45)
|
||||
desc = Ellipsis(desc, 45)
|
||||
}
|
||||
return desc
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
"testing"
|
||||
|
||||
registrytypes "github.com/docker/docker/api/types/registry"
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
"github.com/gotestyourself/gotestyourself/golden"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
@ -79,7 +78,7 @@ func TestSearchContextDescription(t *testing.T) {
|
|||
{searchContext{
|
||||
s: registrytypes.SearchResult{Description: longDescription},
|
||||
trunc: true,
|
||||
}, stringutils.Ellipsis(longDescription, 45), ctx.Description},
|
||||
}, Ellipsis(longDescription, 45), ctx.Description},
|
||||
{searchContext{
|
||||
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
||||
trunc: false,
|
||||
|
@ -87,7 +86,7 @@ func TestSearchContextDescription(t *testing.T) {
|
|||
{searchContext{
|
||||
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
||||
trunc: true,
|
||||
}, stringutils.Ellipsis(longDescription, 45), ctx.Description},
|
||||
}, Ellipsis(longDescription, 45), ctx.Description},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
|
|
|
@ -48,7 +48,7 @@ golang.org/x/crypto 558b6879de74bc843225cde5686419267ff707ca
|
|||
golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
|
||||
golang.org/x/sync 450f422ab23cf9881c94e2db30cac0eb1b7cf80c
|
||||
golang.org/x/sys 07c182904dbd53199946ba614a412c61d3c548f5
|
||||
golang.org/x/text f72d8390a633d5dfb0cc84043294db9f6c935756
|
||||
golang.org/x/text 825fc78a2fd6fa0a5447e300189e3219e05e1f25
|
||||
golang.org/x/time a4bde12657593d5e90d0533a3e4fd95e635124cb
|
||||
google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944
|
||||
google.golang.org/grpc v1.3.0
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
This package provides helper functions for dealing with strings
|
|
@ -1,99 +0,0 @@
|
|||
// Package stringutils provides helper functions for dealing with strings.
|
||||
package stringutils
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GenerateRandomAlphaOnlyString generates an alphabetical random string with length n.
|
||||
func GenerateRandomAlphaOnlyString(n int) string {
|
||||
// make a really long string
|
||||
letters := []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
b := make([]byte, n)
|
||||
for i := range b {
|
||||
b[i] = letters[rand.Intn(len(letters))]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// GenerateRandomASCIIString generates an ASCII random string with length n.
|
||||
func GenerateRandomASCIIString(n int) string {
|
||||
chars := "abcdefghijklmnopqrstuvwxyz" +
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
|
||||
"~!@#$%^&*()-_+={}[]\\|<,>.?/\"';:` "
|
||||
res := make([]byte, n)
|
||||
for i := 0; i < n; i++ {
|
||||
res[i] = chars[rand.Intn(len(chars))]
|
||||
}
|
||||
return string(res)
|
||||
}
|
||||
|
||||
// Ellipsis truncates a string to fit within maxlen, and appends ellipsis (...).
|
||||
// For maxlen of 3 and lower, no ellipsis is appended.
|
||||
func Ellipsis(s string, maxlen int) string {
|
||||
r := []rune(s)
|
||||
if len(r) <= maxlen {
|
||||
return s
|
||||
}
|
||||
if maxlen <= 3 {
|
||||
return string(r[:maxlen])
|
||||
}
|
||||
return string(r[:maxlen-3]) + "..."
|
||||
}
|
||||
|
||||
// Truncate truncates a string to maxlen.
|
||||
func Truncate(s string, maxlen int) string {
|
||||
r := []rune(s)
|
||||
if len(r) <= maxlen {
|
||||
return s
|
||||
}
|
||||
return string(r[:maxlen])
|
||||
}
|
||||
|
||||
// InSlice tests whether a string is contained in a slice of strings or not.
|
||||
// Comparison is case insensitive
|
||||
func InSlice(slice []string, s string) bool {
|
||||
for _, ss := range slice {
|
||||
if strings.ToLower(s) == strings.ToLower(ss) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func quote(word string, buf *bytes.Buffer) {
|
||||
// Bail out early for "simple" strings
|
||||
if word != "" && !strings.ContainsAny(word, "\\'\"`${[|&;<>()~*?! \t\n") {
|
||||
buf.WriteString(word)
|
||||
return
|
||||
}
|
||||
|
||||
buf.WriteString("'")
|
||||
|
||||
for i := 0; i < len(word); i++ {
|
||||
b := word[i]
|
||||
if b == '\'' {
|
||||
// Replace literal ' with a close ', a \', and an open '
|
||||
buf.WriteString("'\\''")
|
||||
} else {
|
||||
buf.WriteByte(b)
|
||||
}
|
||||
}
|
||||
|
||||
buf.WriteString("'")
|
||||
}
|
||||
|
||||
// ShellQuoteArguments takes a list of strings and escapes them so they will be
|
||||
// handled right when passed as arguments to a program via a shell
|
||||
func ShellQuoteArguments(args []string) string {
|
||||
var buf bytes.Buffer
|
||||
for i, arg := range args {
|
||||
if i != 0 {
|
||||
buf.WriteByte(' ')
|
||||
}
|
||||
quote(arg, &buf)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
This repository holds supplementary Go libraries for text processing, many involving Unicode.
|
||||
|
||||
To submit changes to this repository, see http://golang.org/doc/contribute.html.
|
||||
|
||||
To generate the tables in this repository (except for the encoding tables),
|
||||
run go generate from this directory. By default tables are generated for the
|
||||
Unicode version in core and the CLDR version defined in
|
||||
golang.org/x/text/unicode/cldr.
|
||||
|
||||
Running go generate will as a side effect create a DATA subdirectory in this
|
||||
directory which holds all files that are used as a source for generating the
|
||||
tables. This directory will also serve as a cache.
|
||||
|
||||
Run
|
||||
|
||||
go test ./...
|
||||
|
||||
from this directory to run all tests. Add the "-tags icu" flag to also run
|
||||
ICU conformance tests (if available). This requires that you have the correct
|
||||
ICU version installed on your system.
|
||||
|
||||
TODO:
|
||||
- updating unversioned source files.
|
|
@ -0,0 +1,91 @@
|
|||
# Go Text
|
||||
|
||||
This repository holds supplementary Go libraries for text processing, many involving Unicode.
|
||||
|
||||
## Semantic Versioning
|
||||
This repo uses Semantic versioning (http://semver.org/), so
|
||||
1. MAJOR version when you make incompatible API changes,
|
||||
1. MINOR version when you add functionality in a backwards-compatible manner,
|
||||
and
|
||||
1. PATCH version when you make backwards-compatible bug fixes.
|
||||
|
||||
A Unicode major and minor version bump is mapped to a major version bump in
|
||||
x/text.
|
||||
A path version bump in Unicode is mapped to a minor version bump in x/text.
|
||||
Note that, consistent with the definitions in semver, until version 1.0.0 of
|
||||
x/text is reached, the minor version is considered a major version.
|
||||
So going from 0.1.0 to 0.2.0 is considered to be a major version bump.
|
||||
|
||||
A major new CLDR version is mapped to a minor version increase in x/text.
|
||||
Any other new CLDR version is mapped to a patch version increase in x/text.
|
||||
|
||||
## Download/Install
|
||||
|
||||
The easiest way to install is to run `go get -u golang.org/x/text`. You can
|
||||
also manually git clone the repository to `$GOPATH/src/golang.org/x/text`.
|
||||
|
||||
## Contribute
|
||||
To submit changes to this repository, see http://golang.org/doc/contribute.html.
|
||||
|
||||
To generate the tables in this repository (except for the encoding tables),
|
||||
run go generate from this directory. By default tables are generated for the
|
||||
Unicode version in core and the CLDR version defined in
|
||||
golang.org/x/text/unicode/cldr.
|
||||
|
||||
Running go generate will as a side effect create a DATA subdirectory in this
|
||||
directory, which holds all files that are used as a source for generating the
|
||||
tables. This directory will also serve as a cache.
|
||||
|
||||
## Testing
|
||||
Run
|
||||
|
||||
go test ./...
|
||||
|
||||
from this directory to run all tests. Add the "-tags icu" flag to also run
|
||||
ICU conformance tests (if available). This requires that you have the correct
|
||||
ICU version installed on your system.
|
||||
|
||||
TODO:
|
||||
- updating unversioned source files.
|
||||
|
||||
## Generating Tables
|
||||
|
||||
To generate the tables in this repository (except for the encoding
|
||||
tables), run `go generate` from this directory. By default tables are
|
||||
generated for the Unicode version in core and the CLDR version defined in
|
||||
golang.org/x/text/unicode/cldr.
|
||||
|
||||
Running go generate will as a side effect create a DATA subdirectory in this
|
||||
directory which holds all files that are used as a source for generating the
|
||||
tables. This directory will also serve as a cache.
|
||||
|
||||
## Versions
|
||||
To update a Unicode version run
|
||||
|
||||
UNICODE_VERSION=x.x.x go generate
|
||||
|
||||
where `x.x.x` must correspond to a directory in http://www.unicode.org/Public/.
|
||||
If this version is newer than the version in core it will also update the
|
||||
relevant packages there. The idna package in x/net will always be updated.
|
||||
|
||||
To update a CLDR version run
|
||||
|
||||
CLDR_VERSION=version go generate
|
||||
|
||||
where `version` must correspond to a directory in
|
||||
http://www.unicode.org/Public/cldr/.
|
||||
|
||||
Note that the code gets adapted over time to changes in the data and that
|
||||
backwards compatibility is not maintained.
|
||||
So updating to a different version may not work.
|
||||
|
||||
The files in DATA/{iana|icu|w3|whatwg} are currently not versioned.
|
||||
|
||||
## Report Issues / Send Patches
|
||||
|
||||
This repository uses Gerrit for code changes. To learn how to submit changes to
|
||||
this repository, see https://golang.org/doc/contribute.html.
|
||||
|
||||
The main issue tracker for the image repository is located at
|
||||
https://github.com/golang/go/issues. Prefix your issue with "x/image:" in the
|
||||
subject line, so it is easy to find.
|
|
@ -155,6 +155,7 @@ func DirectionString(s string) bidi.Direction {
|
|||
e, sz := bidi.LookupString(s[i:])
|
||||
if sz == 0 {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
c := e.Class()
|
||||
if c == bidi.R || c == bidi.AL || c == bidi.AN {
|
||||
|
@ -203,9 +204,6 @@ func (t *Transformer) isRTL() bool {
|
|||
}
|
||||
|
||||
func (t *Transformer) isFinal() bool {
|
||||
if !t.isRTL() {
|
||||
return true
|
||||
}
|
||||
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
|
||||
}
|
||||
|
||||
|
|
|
@ -726,7 +726,7 @@ loop:
|
|||
continue loop
|
||||
}
|
||||
}
|
||||
log.Panicf("invalid bidi code %s present in assertOnly at position %d", t, s.indexes[i])
|
||||
log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
// This file was generated by go generate; DO NOT EDIT
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
|
|
|
@ -33,17 +33,9 @@ const (
|
|||
// streamSafe implements the policy of when a CGJ should be inserted.
|
||||
type streamSafe uint8
|
||||
|
||||
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
|
||||
// first on it.
|
||||
func mkStreamSafe(p Properties) streamSafe {
|
||||
return streamSafe(p.nTrailingNonStarters())
|
||||
}
|
||||
|
||||
// first inserts the first rune of a segment.
|
||||
// first inserts the first rune of a segment. It is a faster version of next if
|
||||
// it is known p represents the first rune in a segment.
|
||||
func (ss *streamSafe) first(p Properties) {
|
||||
if *ss != 0 {
|
||||
panic("!= 0")
|
||||
}
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
}
|
||||
|
||||
|
@ -66,7 +58,7 @@ func (ss *streamSafe) next(p Properties) ssState {
|
|||
// be a non-starter. Note that it always hold that if nLead > 0 then
|
||||
// nLead == nTrail.
|
||||
if n == 0 {
|
||||
*ss = 0
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
return ssStarter
|
||||
}
|
||||
return ssSuccess
|
||||
|
@ -142,7 +134,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
|
|||
func (rb *reorderBuffer) reset() {
|
||||
rb.nrune = 0
|
||||
rb.nbyte = 0
|
||||
rb.ss = 0
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) doFlush() bool {
|
||||
|
@ -257,6 +248,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
|
|||
// It flushes the buffer on each new segment start.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
|
||||
rb.tmpBytes.setBytes(dcomp)
|
||||
// As the streamSafe accounting already handles the counting for modifiers,
|
||||
// we don't have to call next. However, we do need to keep the accounting
|
||||
// intact when flushing the buffer.
|
||||
for i := 0; i < len(dcomp); {
|
||||
info := rb.f.info(rb.tmpBytes, i)
|
||||
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
|
||||
|
|
|
@ -10,7 +10,7 @@ package norm
|
|||
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||
// bits
|
||||
// 15: 1 (inverse of NFD_QD bit of qcInfo)
|
||||
// 15: 1 (inverse of NFD_QC bit of qcInfo)
|
||||
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
||||
// 6..0: ccc (compressed CCC value).
|
||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||
|
@ -56,28 +56,31 @@ type formInfo struct {
|
|||
nextMain iterFunc
|
||||
}
|
||||
|
||||
var formTable []*formInfo
|
||||
|
||||
func init() {
|
||||
formTable = make([]*formInfo, 4)
|
||||
|
||||
for i := range formTable {
|
||||
f := &formInfo{}
|
||||
formTable[i] = f
|
||||
f.form = Form(i)
|
||||
if Form(i) == NFKD || Form(i) == NFKC {
|
||||
f.compatibility = true
|
||||
f.info = lookupInfoNFKC
|
||||
} else {
|
||||
f.info = lookupInfoNFC
|
||||
}
|
||||
f.nextMain = nextDecomposed
|
||||
if Form(i) == NFC || Form(i) == NFKC {
|
||||
f.nextMain = nextComposed
|
||||
f.composing = true
|
||||
}
|
||||
}
|
||||
}
|
||||
var formTable = []*formInfo{{
|
||||
form: NFC,
|
||||
composing: true,
|
||||
compatibility: false,
|
||||
info: lookupInfoNFC,
|
||||
nextMain: nextComposed,
|
||||
}, {
|
||||
form: NFD,
|
||||
composing: false,
|
||||
compatibility: false,
|
||||
info: lookupInfoNFC,
|
||||
nextMain: nextDecomposed,
|
||||
}, {
|
||||
form: NFKC,
|
||||
composing: true,
|
||||
compatibility: true,
|
||||
info: lookupInfoNFKC,
|
||||
nextMain: nextComposed,
|
||||
}, {
|
||||
form: NFKD,
|
||||
composing: false,
|
||||
compatibility: true,
|
||||
info: lookupInfoNFKC,
|
||||
nextMain: nextDecomposed,
|
||||
}}
|
||||
|
||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||
|
|
|
@ -90,16 +90,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
|
|||
}
|
||||
|
||||
func (in *input) hangul(p int) (r rune) {
|
||||
var size int
|
||||
if in.bytes == nil {
|
||||
if !isHangulString(in.str[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRuneInString(in.str[p:])
|
||||
r, size = utf8.DecodeRuneInString(in.str[p:])
|
||||
} else {
|
||||
if !isHangul(in.bytes[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRune(in.bytes[p:])
|
||||
r, size = utf8.DecodeRune(in.bytes[p:])
|
||||
}
|
||||
if size != hangulUTF8Size {
|
||||
return 0
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ func (i *Iter) Init(f Form, src []byte) {
|
|||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIBytes
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||
|
@ -56,11 +57,12 @@ func (i *Iter) InitString(f Form, src string) {
|
|||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIString
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// Seek sets the segment to be returned by the next call to Next to start
|
||||
// at position p. It is the responsibility of the caller to set p to the
|
||||
// start of a UTF8 rune.
|
||||
// start of a segment.
|
||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||
var abs int64
|
||||
switch whence {
|
||||
|
@ -84,6 +86,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
|||
i.multiSeg = nil
|
||||
i.next = i.rb.f.nextMain
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
|
@ -161,6 +164,7 @@ func nextHangul(i *Iter) []byte {
|
|||
if next >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
} else if i.rb.src.hangul(next) == 0 {
|
||||
i.rb.ss.next(i.info)
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
|
@ -204,12 +208,10 @@ func nextMultiNorm(i *Iter) []byte {
|
|||
if info.BoundaryBefore() {
|
||||
i.rb.compose()
|
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
i.rb.ss.first(info)
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
i.multiSeg = d[j+int(info.size):]
|
||||
return seg
|
||||
}
|
||||
i.rb.ss.next(info)
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
j += int(info.size)
|
||||
}
|
||||
|
@ -222,9 +224,9 @@ func nextMultiNorm(i *Iter) []byte {
|
|||
func nextDecomposed(i *Iter) (next []byte) {
|
||||
outp := 0
|
||||
inCopyStart, outCopyStart := i.p, 0
|
||||
ss := mkStreamSafe(i.info)
|
||||
for {
|
||||
if sz := int(i.info.size); sz <= 1 {
|
||||
i.rb.ss = 0
|
||||
p := i.p
|
||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||
if i.p >= i.rb.nsrc {
|
||||
|
@ -243,6 +245,8 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
p := outp + len(d)
|
||||
if outp > 0 {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
// TODO: this condition should not be possible, but we leave it
|
||||
// in for defensive purposes.
|
||||
if p > len(i.buf) {
|
||||
return i.buf[:outp]
|
||||
}
|
||||
|
@ -266,7 +270,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
} else {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
switch ss.next(i.info) {
|
||||
switch i.rb.ss.next(i.info) {
|
||||
case ssOverflow:
|
||||
i.next = nextCGJDecompose
|
||||
fallthrough
|
||||
|
@ -309,7 +313,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
}
|
||||
prevCC := i.info.tccc
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := ss.next(i.info); v == ssStarter {
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
|
@ -335,10 +339,6 @@ doNorm:
|
|||
|
||||
func doNormDecomposed(i *Iter) []byte {
|
||||
for {
|
||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
|
@ -348,6 +348,10 @@ func doNormDecomposed(i *Iter) []byte {
|
|||
if i.info.ccc == 0 {
|
||||
break
|
||||
}
|
||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
}
|
||||
// new segment or too many combining characters: exit normalization
|
||||
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
|
@ -357,6 +361,7 @@ func nextCGJDecompose(i *Iter) []byte {
|
|||
i.rb.ss = 0
|
||||
i.rb.insertCGJ()
|
||||
i.next = nextDecomposed
|
||||
i.rb.ss.first(i.info)
|
||||
buf := doNormDecomposed(i)
|
||||
return buf
|
||||
}
|
||||
|
@ -365,7 +370,6 @@ func nextCGJDecompose(i *Iter) []byte {
|
|||
func nextComposed(i *Iter) []byte {
|
||||
outp, startp := 0, i.p
|
||||
var prevCC uint8
|
||||
ss := mkStreamSafe(i.info)
|
||||
for {
|
||||
if !i.info.isYesC() {
|
||||
goto doNorm
|
||||
|
@ -385,11 +389,12 @@ func nextComposed(i *Iter) []byte {
|
|||
i.setDone()
|
||||
break
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.rb.ss = 0
|
||||
i.next = i.asciiF
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := ss.next(i.info); v == ssStarter {
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJCompose
|
||||
|
@ -401,8 +406,10 @@ func nextComposed(i *Iter) []byte {
|
|||
}
|
||||
return i.returnSlice(startp, i.p)
|
||||
doNorm:
|
||||
// reset to start position
|
||||
i.p = startp
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
if i.info.multiSegment() {
|
||||
d := i.info.Decomposition()
|
||||
info := i.rb.f.info(input{bytes: d}, 0)
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Note: the file data_test.go that is generated should not be checked in.
|
||||
//go:generate go run maketables.go triegen.go
|
||||
//go:generate go run maketables.go triegen.go -test
|
||||
//go:generate go test -tags test
|
||||
|
||||
// Package norm contains types and functions for normalizing Unicode strings.
|
||||
package norm // import "golang.org/x/text/unicode/norm"
|
||||
|
@ -323,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
|
|||
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
||||
switch ss.next(info) {
|
||||
case ssStarter:
|
||||
ss.first(info)
|
||||
lastSegStart = i
|
||||
case ssOverflow:
|
||||
return lastSegStart, false
|
||||
|
@ -440,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
|
|||
}
|
||||
return -1
|
||||
}
|
||||
// TODO: Using streamSafe to determine the boundary isn't the same as
|
||||
// using BoundaryBefore. Determine which should be used.
|
||||
if s := ss.next(info); s != ssSuccess {
|
||||
return i
|
||||
}
|
||||
|
@ -504,16 +506,15 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
|
|||
if info.size == 0 {
|
||||
return 0
|
||||
}
|
||||
if rb.nrune > 0 {
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
// TODO: this could be removed if we don't support merging.
|
||||
if rb.nrune > 0 {
|
||||
goto end
|
||||
}
|
||||
} else if s == ssOverflow {
|
||||
rb.insertCGJ()
|
||||
goto end
|
||||
}
|
||||
} else {
|
||||
rb.ss.first(info)
|
||||
}
|
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
||||
return int(err)
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,7 +40,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
|
|||
}
|
||||
|
||||
func flushTransform(rb *reorderBuffer) bool {
|
||||
// Write out (must fully fit in dst, or else it is a ErrShortDst).
|
||||
// Write out (must fully fit in dst, or else it is an ErrShortDst).
|
||||
if len(rb.out) < rb.nrune*utf8.UTFMax {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
// Code generated by "stringer -type=Kind"; DO NOT EDIT.
|
||||
|
||||
package width
|
||||
|
||||
import "fmt"
|
||||
|
||||
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
|
||||
|
||||
var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
|
||||
|
||||
func (i Kind) String() string {
|
||||
if i < 0 || i >= Kind(len(_Kind_index)-1) {
|
||||
return fmt.Sprintf("Kind(%d)", i)
|
||||
}
|
||||
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,239 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package width
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type foldTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if elem(v)&tagNeedsFold != 0 {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if elem(v)&tagNeedsFold == 0 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type narrowTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type wideTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package width
|
||||
|
||||
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||
// the inverseData array.
|
||||
type elem uint16
|
||||
|
||||
const (
|
||||
tagNeutral elem = iota << typeShift
|
||||
tagAmbiguous
|
||||
tagWide
|
||||
tagNarrow
|
||||
tagFullwidth
|
||||
tagHalfwidth
|
||||
)
|
||||
|
||||
const (
|
||||
numTypeBits = 3
|
||||
typeShift = 16 - numTypeBits
|
||||
|
||||
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||
// the Won sign U+20A9.
|
||||
tagNeedsFold = 0x1000
|
||||
|
||||
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||
// variant.
|
||||
wonSign rune = 0x20A9
|
||||
)
|
|
@ -0,0 +1,206 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate stringer -type=Kind
|
||||
//go:generate go run gen.go gen_common.go gen_trieval.go
|
||||
|
||||
// Package width provides functionality for handling different widths in text.
|
||||
//
|
||||
// Wide characters behave like ideographs; they tend to allow line breaks after
|
||||
// each character and remain upright in vertical text layout. Narrow characters
|
||||
// are kept together in words or runs that are rotated sideways in vertical text
|
||||
// layout.
|
||||
//
|
||||
// For more information, see http://unicode.org/reports/tr11/.
|
||||
package width // import "golang.org/x/text/width"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO
|
||||
// 1) Reduce table size by compressing blocks.
|
||||
// 2) API proposition for computing display length
|
||||
// (approximation, fixed pitch only).
|
||||
// 3) Implement display length.
|
||||
|
||||
// Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
|
||||
type Kind int
|
||||
|
||||
const (
|
||||
// Neutral characters do not occur in legacy East Asian character sets.
|
||||
Neutral Kind = iota
|
||||
|
||||
// EastAsianAmbiguous characters that can be sometimes wide and sometimes
|
||||
// narrow and require additional information not contained in the character
|
||||
// code to further resolve their width.
|
||||
EastAsianAmbiguous
|
||||
|
||||
// EastAsianWide characters are wide in its usual form. They occur only in
|
||||
// the context of East Asian typography. These runes may have explicit
|
||||
// halfwidth counterparts.
|
||||
EastAsianWide
|
||||
|
||||
// EastAsianNarrow characters are narrow in its usual form. They often have
|
||||
// fullwidth counterparts.
|
||||
EastAsianNarrow
|
||||
|
||||
// Note: there exist Narrow runes that do not have fullwidth or wide
|
||||
// counterparts, despite what the definition says (e.g. U+27E6).
|
||||
|
||||
// EastAsianFullwidth characters have a compatibility decompositions of type
|
||||
// wide that map to a narrow counterpart.
|
||||
EastAsianFullwidth
|
||||
|
||||
// EastAsianHalfwidth characters have a compatibility decomposition of type
|
||||
// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
|
||||
// SIGN.
|
||||
EastAsianHalfwidth
|
||||
|
||||
// Note: there exist runes that have a halfwidth counterparts but that are
|
||||
// classified as Ambiguous, rather than wide (e.g. U+2190).
|
||||
)
|
||||
|
||||
// TODO: the generated tries need to return size 1 for invalid runes for the
|
||||
// width to be computed correctly (each byte should render width 1)
|
||||
|
||||
var trie = newWidthTrie(0)
|
||||
|
||||
// Lookup reports the Properties of the first rune in b and the number of bytes
|
||||
// of its UTF-8 encoding.
|
||||
func Lookup(b []byte) (p Properties, size int) {
|
||||
v, sz := trie.lookup(b)
|
||||
return Properties{elem(v), b[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupString reports the Properties of the first rune in s and the number of
|
||||
// bytes of its UTF-8 encoding.
|
||||
func LookupString(s string) (p Properties, size int) {
|
||||
v, sz := trie.lookupString(s)
|
||||
return Properties{elem(v), s[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupRune reports the Properties of rune r.
|
||||
func LookupRune(r rune) Properties {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
v, _ := trie.lookup(buf[:n])
|
||||
last := byte(r)
|
||||
if r >= utf8.RuneSelf {
|
||||
last = 0x80 + byte(r&0x3f)
|
||||
}
|
||||
return Properties{elem(v), last}
|
||||
}
|
||||
|
||||
// Properties provides access to width properties of a rune.
|
||||
type Properties struct {
|
||||
elem elem
|
||||
last byte
|
||||
}
|
||||
|
||||
func (e elem) kind() Kind {
|
||||
return Kind(e >> typeShift)
|
||||
}
|
||||
|
||||
// Kind returns the Kind of a rune as defined in Unicode TR #11.
|
||||
// See http://unicode.org/reports/tr11/ for more details.
|
||||
func (p Properties) Kind() Kind {
|
||||
return p.elem.kind()
|
||||
}
|
||||
|
||||
// Folded returns the folded variant of a rune or 0 if the rune is canonical.
|
||||
func (p Properties) Folded() rune {
|
||||
if p.elem&tagNeedsFold != 0 {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Narrow returns the narrow variant of a rune or 0 if the rune is already
|
||||
// narrow or doesn't have a narrow variant.
|
||||
func (p Properties) Narrow() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Wide returns the wide variant of a rune or 0 if the rune is already
|
||||
// wide or doesn't have a wide variant.
|
||||
func (p Properties) Wide() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// TODO for Properties:
|
||||
// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
|
||||
// mapping.
|
||||
// - Add width information (including information on non-spacing runes).
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
var (
|
||||
// Fold is a transform that maps all runes to their canonical width.
|
||||
//
|
||||
// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
|
||||
// provide a more generic folding mechanism.
|
||||
Fold Transformer = Transformer{foldTransform{}}
|
||||
|
||||
// Widen is a transform that maps runes to their wide variant, if
|
||||
// available.
|
||||
Widen Transformer = Transformer{wideTransform{}}
|
||||
|
||||
// Narrow is a transform that maps runes to their narrow variant, if
|
||||
// available.
|
||||
Narrow Transformer = Transformer{narrowTransform{}}
|
||||
)
|
||||
|
||||
// TODO: Consider the following options:
|
||||
// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
|
||||
// generalized variant of this.
|
||||
// - Consider a wide Won character to be the default width (or some generalized
|
||||
// variant of this).
|
||||
// - Filter the set of characters that gets converted (the preferred approach is
|
||||
// to allow applying filters to transforms).
|
Loading…
Reference in New Issue