mirror of https://github.com/docker/cli.git
Refactor stringutils and fix docker search output form when the description has CJK character
Signed-off-by: Chao Wang <wangchao.fnst@cn.fujitsu.com>
This commit is contained in:
parent
6ef0ea82ea
commit
926b20fcb5
|
@ -10,7 +10,6 @@ import (
|
||||||
"github.com/docker/distribution/reference"
|
"github.com/docker/distribution/reference"
|
||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
"github.com/docker/go-units"
|
"github.com/docker/go-units"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -165,7 +164,7 @@ func (c *containerContext) Image() string {
|
||||||
func (c *containerContext) Command() string {
|
func (c *containerContext) Command() string {
|
||||||
command := c.c.Command
|
command := c.c.Command
|
||||||
if c.trunc {
|
if c.trunc {
|
||||||
command = stringutils.Ellipsis(command, 20)
|
command = Ellipsis(command, 20)
|
||||||
}
|
}
|
||||||
return strconv.Quote(command)
|
return strconv.Quote(command)
|
||||||
}
|
}
|
||||||
|
@ -227,7 +226,7 @@ func (c *containerContext) Mounts() string {
|
||||||
name = m.Name
|
name = m.Name
|
||||||
}
|
}
|
||||||
if c.trunc {
|
if c.trunc {
|
||||||
name = stringutils.Ellipsis(name, 15)
|
name = Ellipsis(name, 15)
|
||||||
}
|
}
|
||||||
mounts = append(mounts, name)
|
mounts = append(mounts, name)
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,7 @@ func TestContainerPsContext(t *testing.T) {
|
||||||
Source: "/a/path",
|
Source: "/a/path",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, true, "this-is-a-lo...", ctx.Mounts},
|
}, true, "this-is-a-long…", ctx.Mounts},
|
||||||
{types.Container{
|
{types.Container{
|
||||||
Mounts: []types.MountPoint{
|
Mounts: []types.MountPoint{
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
package formatter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/width"
|
||||||
|
)
|
||||||
|
|
||||||
|
// charWidth returns the number of horizontal positions a character occupies,
|
||||||
|
// and is used to account for wide characters when displaying strings.
|
||||||
|
//
|
||||||
|
// In a broad sense, wide characters include East Asian Wide, East Asian Full-width,
|
||||||
|
// (when not in East Asian context) see http://unicode.org/reports/tr11/.
|
||||||
|
func charWidth(r rune) int {
|
||||||
|
switch width.LookupRune(r).Kind() {
|
||||||
|
case width.EastAsianWide, width.EastAsianFullwidth:
|
||||||
|
return 2
|
||||||
|
default:
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ellipsis truncates a string to fit within maxDisplayWidth, and appends ellipsis (…).
|
||||||
|
// For maxDisplayWidth of 1 and lower, no ellipsis is appended.
|
||||||
|
// For maxDisplayWidth of 1, first char of string will return even if its width > 1.
|
||||||
|
func Ellipsis(s string, maxDisplayWidth int) string {
|
||||||
|
if maxDisplayWidth <= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
rs := []rune(s)
|
||||||
|
if maxDisplayWidth == 1 {
|
||||||
|
return string(rs[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
byteLen := len(s)
|
||||||
|
if byteLen == utf8.RuneCountInString(s) {
|
||||||
|
if byteLen <= maxDisplayWidth {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return string(rs[:maxDisplayWidth-1]) + "…"
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
display []int
|
||||||
|
displayWidth int
|
||||||
|
)
|
||||||
|
for _, r := range rs {
|
||||||
|
cw := charWidth(r)
|
||||||
|
displayWidth += cw
|
||||||
|
display = append(display, displayWidth)
|
||||||
|
}
|
||||||
|
if displayWidth <= maxDisplayWidth {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
for i := range display {
|
||||||
|
if display[i] <= maxDisplayWidth-1 && display[i+1] > maxDisplayWidth-1 {
|
||||||
|
return string(rs[:i+1]) + "…"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
package formatter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestEllipsis(t *testing.T) {
|
||||||
|
var testcases = []struct {
|
||||||
|
source string
|
||||||
|
width int
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{source: "t🐳ststring", width: 0, expected: ""},
|
||||||
|
{source: "t🐳ststring", width: 1, expected: "t"},
|
||||||
|
{source: "t🐳ststring", width: 2, expected: "t…"},
|
||||||
|
{source: "t🐳ststring", width: 6, expected: "t🐳st…"},
|
||||||
|
{source: "t🐳ststring", width: 20, expected: "t🐳ststring"},
|
||||||
|
{source: "你好世界teststring", width: 0, expected: ""},
|
||||||
|
{source: "你好世界teststring", width: 1, expected: "你"},
|
||||||
|
{source: "你好世界teststring", width: 3, expected: "你…"},
|
||||||
|
{source: "你好世界teststring", width: 6, expected: "你好…"},
|
||||||
|
{source: "你好世界teststring", width: 20, expected: "你好世界teststring"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testcase := range testcases {
|
||||||
|
assert.Equal(t, testcase.expected, Ellipsis(testcase.source, testcase.width))
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,7 +7,6 @@ import (
|
||||||
|
|
||||||
"github.com/docker/docker/api/types/image"
|
"github.com/docker/docker/api/types/image"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
units "github.com/docker/go-units"
|
units "github.com/docker/go-units"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -93,7 +92,7 @@ func (c *historyContext) CreatedSince() string {
|
||||||
func (c *historyContext) CreatedBy() string {
|
func (c *historyContext) CreatedBy() string {
|
||||||
createdBy := strings.Replace(c.h.CreatedBy, "\t", " ", -1)
|
createdBy := strings.Replace(c.h.CreatedBy, "\t", " ", -1)
|
||||||
if c.trunc {
|
if c.trunc {
|
||||||
return stringutils.Ellipsis(createdBy, 45)
|
return Ellipsis(createdBy, 45)
|
||||||
}
|
}
|
||||||
return createdBy
|
return createdBy
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,6 @@ import (
|
||||||
|
|
||||||
"github.com/docker/docker/api/types/image"
|
"github.com/docker/docker/api/types/image"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -96,7 +95,7 @@ func TestHistoryContext_CreatedBy(t *testing.T) {
|
||||||
historyContext{
|
historyContext{
|
||||||
h: image.HistoryResponseItem{CreatedBy: withTabs},
|
h: image.HistoryResponseItem{CreatedBy: withTabs},
|
||||||
trunc: true,
|
trunc: true,
|
||||||
}, stringutils.Ellipsis(expected, 45), ctx.CreatedBy,
|
}, Ellipsis(expected, 45), ctx.CreatedBy,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +190,7 @@ imageID3 24 hours ago /bin/bash ls
|
||||||
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
||||||
`
|
`
|
||||||
expectedTrunc := `IMAGE CREATED CREATED BY SIZE COMMENT
|
expectedTrunc := `IMAGE CREATED CREATED BY SIZE COMMENT
|
||||||
imageID1 24 hours ago /bin/bash ls && npm i && npm run test && k... 183MB Hi
|
imageID1 24 hours ago /bin/bash ls && npm i && npm run test && kar… 183MB Hi
|
||||||
imageID2 24 hours ago /bin/bash echo 183MB Hi
|
imageID2 24 hours ago /bin/bash echo 183MB Hi
|
||||||
imageID3 24 hours ago /bin/bash ls 183MB Hi
|
imageID3 24 hours ago /bin/bash ls 183MB Hi
|
||||||
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
imageID4 24 hours ago /bin/bash grep 183MB Hi
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
|
|
||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -80,7 +79,7 @@ func (c *pluginContext) Description() string {
|
||||||
desc := strings.Replace(c.p.Config.Description, "\n", "", -1)
|
desc := strings.Replace(c.p.Config.Description, "\n", "", -1)
|
||||||
desc = strings.Replace(desc, "\r", "", -1)
|
desc = strings.Replace(desc, "\r", "", -1)
|
||||||
if c.trunc {
|
if c.trunc {
|
||||||
desc = stringutils.Ellipsis(desc, 45)
|
desc = Ellipsis(desc, 45)
|
||||||
}
|
}
|
||||||
|
|
||||||
return desc
|
return desc
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
registry "github.com/docker/docker/api/types/registry"
|
registry "github.com/docker/docker/api/types/registry"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -73,7 +72,7 @@ func (c *searchContext) Description() string {
|
||||||
desc := strings.Replace(c.s.Description, "\n", " ", -1)
|
desc := strings.Replace(c.s.Description, "\n", " ", -1)
|
||||||
desc = strings.Replace(desc, "\r", " ", -1)
|
desc = strings.Replace(desc, "\r", " ", -1)
|
||||||
if c.trunc {
|
if c.trunc {
|
||||||
desc = stringutils.Ellipsis(desc, 45)
|
desc = Ellipsis(desc, 45)
|
||||||
}
|
}
|
||||||
return desc
|
return desc
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
registrytypes "github.com/docker/docker/api/types/registry"
|
registrytypes "github.com/docker/docker/api/types/registry"
|
||||||
"github.com/docker/docker/pkg/stringutils"
|
|
||||||
"github.com/gotestyourself/gotestyourself/golden"
|
"github.com/gotestyourself/gotestyourself/golden"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
@ -79,7 +78,7 @@ func TestSearchContextDescription(t *testing.T) {
|
||||||
{searchContext{
|
{searchContext{
|
||||||
s: registrytypes.SearchResult{Description: longDescription},
|
s: registrytypes.SearchResult{Description: longDescription},
|
||||||
trunc: true,
|
trunc: true,
|
||||||
}, stringutils.Ellipsis(longDescription, 45), ctx.Description},
|
}, Ellipsis(longDescription, 45), ctx.Description},
|
||||||
{searchContext{
|
{searchContext{
|
||||||
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
||||||
trunc: false,
|
trunc: false,
|
||||||
|
@ -87,7 +86,7 @@ func TestSearchContextDescription(t *testing.T) {
|
||||||
{searchContext{
|
{searchContext{
|
||||||
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
s: registrytypes.SearchResult{Description: descriptionWReturns},
|
||||||
trunc: true,
|
trunc: true,
|
||||||
}, stringutils.Ellipsis(longDescription, 45), ctx.Description},
|
}, Ellipsis(longDescription, 45), ctx.Description},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
|
|
|
@ -48,7 +48,7 @@ golang.org/x/crypto 558b6879de74bc843225cde5686419267ff707ca
|
||||||
golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
|
golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
|
||||||
golang.org/x/sync 450f422ab23cf9881c94e2db30cac0eb1b7cf80c
|
golang.org/x/sync 450f422ab23cf9881c94e2db30cac0eb1b7cf80c
|
||||||
golang.org/x/sys 07c182904dbd53199946ba614a412c61d3c548f5
|
golang.org/x/sys 07c182904dbd53199946ba614a412c61d3c548f5
|
||||||
golang.org/x/text f72d8390a633d5dfb0cc84043294db9f6c935756
|
golang.org/x/text 825fc78a2fd6fa0a5447e300189e3219e05e1f25
|
||||||
golang.org/x/time a4bde12657593d5e90d0533a3e4fd95e635124cb
|
golang.org/x/time a4bde12657593d5e90d0533a3e4fd95e635124cb
|
||||||
google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944
|
google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944
|
||||||
google.golang.org/grpc v1.3.0
|
google.golang.org/grpc v1.3.0
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
This package provides helper functions for dealing with strings
|
|
|
@ -1,99 +0,0 @@
|
||||||
// Package stringutils provides helper functions for dealing with strings.
|
|
||||||
package stringutils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"math/rand"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// GenerateRandomAlphaOnlyString generates an alphabetical random string with length n.
|
|
||||||
func GenerateRandomAlphaOnlyString(n int) string {
|
|
||||||
// make a really long string
|
|
||||||
letters := []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
||||||
b := make([]byte, n)
|
|
||||||
for i := range b {
|
|
||||||
b[i] = letters[rand.Intn(len(letters))]
|
|
||||||
}
|
|
||||||
return string(b)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GenerateRandomASCIIString generates an ASCII random string with length n.
|
|
||||||
func GenerateRandomASCIIString(n int) string {
|
|
||||||
chars := "abcdefghijklmnopqrstuvwxyz" +
|
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
|
|
||||||
"~!@#$%^&*()-_+={}[]\\|<,>.?/\"';:` "
|
|
||||||
res := make([]byte, n)
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
res[i] = chars[rand.Intn(len(chars))]
|
|
||||||
}
|
|
||||||
return string(res)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ellipsis truncates a string to fit within maxlen, and appends ellipsis (...).
|
|
||||||
// For maxlen of 3 and lower, no ellipsis is appended.
|
|
||||||
func Ellipsis(s string, maxlen int) string {
|
|
||||||
r := []rune(s)
|
|
||||||
if len(r) <= maxlen {
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
if maxlen <= 3 {
|
|
||||||
return string(r[:maxlen])
|
|
||||||
}
|
|
||||||
return string(r[:maxlen-3]) + "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
// Truncate truncates a string to maxlen.
|
|
||||||
func Truncate(s string, maxlen int) string {
|
|
||||||
r := []rune(s)
|
|
||||||
if len(r) <= maxlen {
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
return string(r[:maxlen])
|
|
||||||
}
|
|
||||||
|
|
||||||
// InSlice tests whether a string is contained in a slice of strings or not.
|
|
||||||
// Comparison is case insensitive
|
|
||||||
func InSlice(slice []string, s string) bool {
|
|
||||||
for _, ss := range slice {
|
|
||||||
if strings.ToLower(s) == strings.ToLower(ss) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func quote(word string, buf *bytes.Buffer) {
|
|
||||||
// Bail out early for "simple" strings
|
|
||||||
if word != "" && !strings.ContainsAny(word, "\\'\"`${[|&;<>()~*?! \t\n") {
|
|
||||||
buf.WriteString(word)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.WriteString("'")
|
|
||||||
|
|
||||||
for i := 0; i < len(word); i++ {
|
|
||||||
b := word[i]
|
|
||||||
if b == '\'' {
|
|
||||||
// Replace literal ' with a close ', a \', and an open '
|
|
||||||
buf.WriteString("'\\''")
|
|
||||||
} else {
|
|
||||||
buf.WriteByte(b)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.WriteString("'")
|
|
||||||
}
|
|
||||||
|
|
||||||
// ShellQuoteArguments takes a list of strings and escapes them so they will be
|
|
||||||
// handled right when passed as arguments to a program via a shell
|
|
||||||
func ShellQuoteArguments(args []string) string {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
for i, arg := range args {
|
|
||||||
if i != 0 {
|
|
||||||
buf.WriteByte(' ')
|
|
||||||
}
|
|
||||||
quote(arg, &buf)
|
|
||||||
}
|
|
||||||
return buf.String()
|
|
||||||
}
|
|
|
@ -1,23 +0,0 @@
|
||||||
This repository holds supplementary Go libraries for text processing, many involving Unicode.
|
|
||||||
|
|
||||||
To submit changes to this repository, see http://golang.org/doc/contribute.html.
|
|
||||||
|
|
||||||
To generate the tables in this repository (except for the encoding tables),
|
|
||||||
run go generate from this directory. By default tables are generated for the
|
|
||||||
Unicode version in core and the CLDR version defined in
|
|
||||||
golang.org/x/text/unicode/cldr.
|
|
||||||
|
|
||||||
Running go generate will as a side effect create a DATA subdirectory in this
|
|
||||||
directory which holds all files that are used as a source for generating the
|
|
||||||
tables. This directory will also serve as a cache.
|
|
||||||
|
|
||||||
Run
|
|
||||||
|
|
||||||
go test ./...
|
|
||||||
|
|
||||||
from this directory to run all tests. Add the "-tags icu" flag to also run
|
|
||||||
ICU conformance tests (if available). This requires that you have the correct
|
|
||||||
ICU version installed on your system.
|
|
||||||
|
|
||||||
TODO:
|
|
||||||
- updating unversioned source files.
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
# Go Text
|
||||||
|
|
||||||
|
This repository holds supplementary Go libraries for text processing, many involving Unicode.
|
||||||
|
|
||||||
|
## Semantic Versioning
|
||||||
|
This repo uses Semantic versioning (http://semver.org/), so
|
||||||
|
1. MAJOR version when you make incompatible API changes,
|
||||||
|
1. MINOR version when you add functionality in a backwards-compatible manner,
|
||||||
|
and
|
||||||
|
1. PATCH version when you make backwards-compatible bug fixes.
|
||||||
|
|
||||||
|
A Unicode major and minor version bump is mapped to a major version bump in
|
||||||
|
x/text.
|
||||||
|
A path version bump in Unicode is mapped to a minor version bump in x/text.
|
||||||
|
Note that, consistent with the definitions in semver, until version 1.0.0 of
|
||||||
|
x/text is reached, the minor version is considered a major version.
|
||||||
|
So going from 0.1.0 to 0.2.0 is considered to be a major version bump.
|
||||||
|
|
||||||
|
A major new CLDR version is mapped to a minor version increase in x/text.
|
||||||
|
Any other new CLDR version is mapped to a patch version increase in x/text.
|
||||||
|
|
||||||
|
## Download/Install
|
||||||
|
|
||||||
|
The easiest way to install is to run `go get -u golang.org/x/text`. You can
|
||||||
|
also manually git clone the repository to `$GOPATH/src/golang.org/x/text`.
|
||||||
|
|
||||||
|
## Contribute
|
||||||
|
To submit changes to this repository, see http://golang.org/doc/contribute.html.
|
||||||
|
|
||||||
|
To generate the tables in this repository (except for the encoding tables),
|
||||||
|
run go generate from this directory. By default tables are generated for the
|
||||||
|
Unicode version in core and the CLDR version defined in
|
||||||
|
golang.org/x/text/unicode/cldr.
|
||||||
|
|
||||||
|
Running go generate will as a side effect create a DATA subdirectory in this
|
||||||
|
directory, which holds all files that are used as a source for generating the
|
||||||
|
tables. This directory will also serve as a cache.
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
Run
|
||||||
|
|
||||||
|
go test ./...
|
||||||
|
|
||||||
|
from this directory to run all tests. Add the "-tags icu" flag to also run
|
||||||
|
ICU conformance tests (if available). This requires that you have the correct
|
||||||
|
ICU version installed on your system.
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
- updating unversioned source files.
|
||||||
|
|
||||||
|
## Generating Tables
|
||||||
|
|
||||||
|
To generate the tables in this repository (except for the encoding
|
||||||
|
tables), run `go generate` from this directory. By default tables are
|
||||||
|
generated for the Unicode version in core and the CLDR version defined in
|
||||||
|
golang.org/x/text/unicode/cldr.
|
||||||
|
|
||||||
|
Running go generate will as a side effect create a DATA subdirectory in this
|
||||||
|
directory which holds all files that are used as a source for generating the
|
||||||
|
tables. This directory will also serve as a cache.
|
||||||
|
|
||||||
|
## Versions
|
||||||
|
To update a Unicode version run
|
||||||
|
|
||||||
|
UNICODE_VERSION=x.x.x go generate
|
||||||
|
|
||||||
|
where `x.x.x` must correspond to a directory in http://www.unicode.org/Public/.
|
||||||
|
If this version is newer than the version in core it will also update the
|
||||||
|
relevant packages there. The idna package in x/net will always be updated.
|
||||||
|
|
||||||
|
To update a CLDR version run
|
||||||
|
|
||||||
|
CLDR_VERSION=version go generate
|
||||||
|
|
||||||
|
where `version` must correspond to a directory in
|
||||||
|
http://www.unicode.org/Public/cldr/.
|
||||||
|
|
||||||
|
Note that the code gets adapted over time to changes in the data and that
|
||||||
|
backwards compatibility is not maintained.
|
||||||
|
So updating to a different version may not work.
|
||||||
|
|
||||||
|
The files in DATA/{iana|icu|w3|whatwg} are currently not versioned.
|
||||||
|
|
||||||
|
## Report Issues / Send Patches
|
||||||
|
|
||||||
|
This repository uses Gerrit for code changes. To learn how to submit changes to
|
||||||
|
this repository, see https://golang.org/doc/contribute.html.
|
||||||
|
|
||||||
|
The main issue tracker for the image repository is located at
|
||||||
|
https://github.com/golang/go/issues. Prefix your issue with "x/image:" in the
|
||||||
|
subject line, so it is easy to find.
|
|
@ -155,6 +155,7 @@ func DirectionString(s string) bidi.Direction {
|
||||||
e, sz := bidi.LookupString(s[i:])
|
e, sz := bidi.LookupString(s[i:])
|
||||||
if sz == 0 {
|
if sz == 0 {
|
||||||
i++
|
i++
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
c := e.Class()
|
c := e.Class()
|
||||||
if c == bidi.R || c == bidi.AL || c == bidi.AN {
|
if c == bidi.R || c == bidi.AL || c == bidi.AN {
|
||||||
|
@ -203,9 +204,6 @@ func (t *Transformer) isRTL() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Transformer) isFinal() bool {
|
func (t *Transformer) isFinal() bool {
|
||||||
if !t.isRTL() {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
|
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -726,7 +726,7 @@ loop:
|
||||||
continue loop
|
continue loop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Panicf("invalid bidi code %s present in assertOnly at position %d", t, s.indexes[i])
|
log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
// This file was generated by go generate; DO NOT EDIT
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
package bidi
|
package bidi
|
||||||
|
|
||||||
|
|
|
@ -33,17 +33,9 @@ const (
|
||||||
// streamSafe implements the policy of when a CGJ should be inserted.
|
// streamSafe implements the policy of when a CGJ should be inserted.
|
||||||
type streamSafe uint8
|
type streamSafe uint8
|
||||||
|
|
||||||
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
|
// first inserts the first rune of a segment. It is a faster version of next if
|
||||||
// first on it.
|
// it is known p represents the first rune in a segment.
|
||||||
func mkStreamSafe(p Properties) streamSafe {
|
|
||||||
return streamSafe(p.nTrailingNonStarters())
|
|
||||||
}
|
|
||||||
|
|
||||||
// first inserts the first rune of a segment.
|
|
||||||
func (ss *streamSafe) first(p Properties) {
|
func (ss *streamSafe) first(p Properties) {
|
||||||
if *ss != 0 {
|
|
||||||
panic("!= 0")
|
|
||||||
}
|
|
||||||
*ss = streamSafe(p.nTrailingNonStarters())
|
*ss = streamSafe(p.nTrailingNonStarters())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,7 +58,7 @@ func (ss *streamSafe) next(p Properties) ssState {
|
||||||
// be a non-starter. Note that it always hold that if nLead > 0 then
|
// be a non-starter. Note that it always hold that if nLead > 0 then
|
||||||
// nLead == nTrail.
|
// nLead == nTrail.
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
*ss = 0
|
*ss = streamSafe(p.nTrailingNonStarters())
|
||||||
return ssStarter
|
return ssStarter
|
||||||
}
|
}
|
||||||
return ssSuccess
|
return ssSuccess
|
||||||
|
@ -142,7 +134,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
|
||||||
func (rb *reorderBuffer) reset() {
|
func (rb *reorderBuffer) reset() {
|
||||||
rb.nrune = 0
|
rb.nrune = 0
|
||||||
rb.nbyte = 0
|
rb.nbyte = 0
|
||||||
rb.ss = 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rb *reorderBuffer) doFlush() bool {
|
func (rb *reorderBuffer) doFlush() bool {
|
||||||
|
@ -257,6 +248,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
|
||||||
// It flushes the buffer on each new segment start.
|
// It flushes the buffer on each new segment start.
|
||||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
|
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
|
||||||
rb.tmpBytes.setBytes(dcomp)
|
rb.tmpBytes.setBytes(dcomp)
|
||||||
|
// As the streamSafe accounting already handles the counting for modifiers,
|
||||||
|
// we don't have to call next. However, we do need to keep the accounting
|
||||||
|
// intact when flushing the buffer.
|
||||||
for i := 0; i < len(dcomp); {
|
for i := 0; i < len(dcomp); {
|
||||||
info := rb.f.info(rb.tmpBytes, i)
|
info := rb.f.info(rb.tmpBytes, i)
|
||||||
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
|
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
|
||||||
|
|
|
@ -10,7 +10,7 @@ package norm
|
||||||
// and its corresponding decomposing form share the same trie. Each trie maps
|
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||||
// bits
|
// bits
|
||||||
// 15: 1 (inverse of NFD_QD bit of qcInfo)
|
// 15: 1 (inverse of NFD_QC bit of qcInfo)
|
||||||
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
||||||
// 6..0: ccc (compressed CCC value).
|
// 6..0: ccc (compressed CCC value).
|
||||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||||
|
@ -56,28 +56,31 @@ type formInfo struct {
|
||||||
nextMain iterFunc
|
nextMain iterFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
var formTable []*formInfo
|
var formTable = []*formInfo{{
|
||||||
|
form: NFC,
|
||||||
func init() {
|
composing: true,
|
||||||
formTable = make([]*formInfo, 4)
|
compatibility: false,
|
||||||
|
info: lookupInfoNFC,
|
||||||
for i := range formTable {
|
nextMain: nextComposed,
|
||||||
f := &formInfo{}
|
}, {
|
||||||
formTable[i] = f
|
form: NFD,
|
||||||
f.form = Form(i)
|
composing: false,
|
||||||
if Form(i) == NFKD || Form(i) == NFKC {
|
compatibility: false,
|
||||||
f.compatibility = true
|
info: lookupInfoNFC,
|
||||||
f.info = lookupInfoNFKC
|
nextMain: nextDecomposed,
|
||||||
} else {
|
}, {
|
||||||
f.info = lookupInfoNFC
|
form: NFKC,
|
||||||
}
|
composing: true,
|
||||||
f.nextMain = nextDecomposed
|
compatibility: true,
|
||||||
if Form(i) == NFC || Form(i) == NFKC {
|
info: lookupInfoNFKC,
|
||||||
f.nextMain = nextComposed
|
nextMain: nextComposed,
|
||||||
f.composing = true
|
}, {
|
||||||
}
|
form: NFKD,
|
||||||
}
|
composing: false,
|
||||||
}
|
compatibility: true,
|
||||||
|
info: lookupInfoNFKC,
|
||||||
|
nextMain: nextDecomposed,
|
||||||
|
}}
|
||||||
|
|
||||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||||
|
|
|
@ -90,16 +90,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (in *input) hangul(p int) (r rune) {
|
func (in *input) hangul(p int) (r rune) {
|
||||||
|
var size int
|
||||||
if in.bytes == nil {
|
if in.bytes == nil {
|
||||||
if !isHangulString(in.str[p:]) {
|
if !isHangulString(in.str[p:]) {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
r, _ = utf8.DecodeRuneInString(in.str[p:])
|
r, size = utf8.DecodeRuneInString(in.str[p:])
|
||||||
} else {
|
} else {
|
||||||
if !isHangul(in.bytes[p:]) {
|
if !isHangul(in.bytes[p:]) {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
r, _ = utf8.DecodeRune(in.bytes[p:])
|
r, size = utf8.DecodeRune(in.bytes[p:])
|
||||||
|
}
|
||||||
|
if size != hangulUTF8Size {
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ func (i *Iter) Init(f Form, src []byte) {
|
||||||
i.next = i.rb.f.nextMain
|
i.next = i.rb.f.nextMain
|
||||||
i.asciiF = nextASCIIBytes
|
i.asciiF = nextASCIIBytes
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
|
i.rb.ss.first(i.info)
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||||
|
@ -56,11 +57,12 @@ func (i *Iter) InitString(f Form, src string) {
|
||||||
i.next = i.rb.f.nextMain
|
i.next = i.rb.f.nextMain
|
||||||
i.asciiF = nextASCIIString
|
i.asciiF = nextASCIIString
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
|
i.rb.ss.first(i.info)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Seek sets the segment to be returned by the next call to Next to start
|
// Seek sets the segment to be returned by the next call to Next to start
|
||||||
// at position p. It is the responsibility of the caller to set p to the
|
// at position p. It is the responsibility of the caller to set p to the
|
||||||
// start of a UTF8 rune.
|
// start of a segment.
|
||||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||||
var abs int64
|
var abs int64
|
||||||
switch whence {
|
switch whence {
|
||||||
|
@ -84,6 +86,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||||
i.multiSeg = nil
|
i.multiSeg = nil
|
||||||
i.next = i.rb.f.nextMain
|
i.next = i.rb.f.nextMain
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
|
i.rb.ss.first(i.info)
|
||||||
return abs, nil
|
return abs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,6 +164,7 @@ func nextHangul(i *Iter) []byte {
|
||||||
if next >= i.rb.nsrc {
|
if next >= i.rb.nsrc {
|
||||||
i.setDone()
|
i.setDone()
|
||||||
} else if i.rb.src.hangul(next) == 0 {
|
} else if i.rb.src.hangul(next) == 0 {
|
||||||
|
i.rb.ss.next(i.info)
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
i.next = i.rb.f.nextMain
|
i.next = i.rb.f.nextMain
|
||||||
return i.next(i)
|
return i.next(i)
|
||||||
|
@ -204,12 +208,10 @@ func nextMultiNorm(i *Iter) []byte {
|
||||||
if info.BoundaryBefore() {
|
if info.BoundaryBefore() {
|
||||||
i.rb.compose()
|
i.rb.compose()
|
||||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||||
i.rb.ss.first(info)
|
|
||||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||||
i.multiSeg = d[j+int(info.size):]
|
i.multiSeg = d[j+int(info.size):]
|
||||||
return seg
|
return seg
|
||||||
}
|
}
|
||||||
i.rb.ss.next(info)
|
|
||||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||||
j += int(info.size)
|
j += int(info.size)
|
||||||
}
|
}
|
||||||
|
@ -222,9 +224,9 @@ func nextMultiNorm(i *Iter) []byte {
|
||||||
func nextDecomposed(i *Iter) (next []byte) {
|
func nextDecomposed(i *Iter) (next []byte) {
|
||||||
outp := 0
|
outp := 0
|
||||||
inCopyStart, outCopyStart := i.p, 0
|
inCopyStart, outCopyStart := i.p, 0
|
||||||
ss := mkStreamSafe(i.info)
|
|
||||||
for {
|
for {
|
||||||
if sz := int(i.info.size); sz <= 1 {
|
if sz := int(i.info.size); sz <= 1 {
|
||||||
|
i.rb.ss = 0
|
||||||
p := i.p
|
p := i.p
|
||||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||||
if i.p >= i.rb.nsrc {
|
if i.p >= i.rb.nsrc {
|
||||||
|
@ -243,6 +245,8 @@ func nextDecomposed(i *Iter) (next []byte) {
|
||||||
p := outp + len(d)
|
p := outp + len(d)
|
||||||
if outp > 0 {
|
if outp > 0 {
|
||||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||||
|
// TODO: this condition should not be possible, but we leave it
|
||||||
|
// in for defensive purposes.
|
||||||
if p > len(i.buf) {
|
if p > len(i.buf) {
|
||||||
return i.buf[:outp]
|
return i.buf[:outp]
|
||||||
}
|
}
|
||||||
|
@ -266,7 +270,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
||||||
} else {
|
} else {
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
}
|
}
|
||||||
switch ss.next(i.info) {
|
switch i.rb.ss.next(i.info) {
|
||||||
case ssOverflow:
|
case ssOverflow:
|
||||||
i.next = nextCGJDecompose
|
i.next = nextCGJDecompose
|
||||||
fallthrough
|
fallthrough
|
||||||
|
@ -309,7 +313,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
||||||
}
|
}
|
||||||
prevCC := i.info.tccc
|
prevCC := i.info.tccc
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
if v := ss.next(i.info); v == ssStarter {
|
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||||
break
|
break
|
||||||
} else if v == ssOverflow {
|
} else if v == ssOverflow {
|
||||||
i.next = nextCGJDecompose
|
i.next = nextCGJDecompose
|
||||||
|
@ -335,10 +339,6 @@ doNorm:
|
||||||
|
|
||||||
func doNormDecomposed(i *Iter) []byte {
|
func doNormDecomposed(i *Iter) []byte {
|
||||||
for {
|
for {
|
||||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
|
||||||
i.next = nextCGJDecompose
|
|
||||||
break
|
|
||||||
}
|
|
||||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||||
i.setDone()
|
i.setDone()
|
||||||
|
@ -348,6 +348,10 @@ func doNormDecomposed(i *Iter) []byte {
|
||||||
if i.info.ccc == 0 {
|
if i.info.ccc == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||||
|
i.next = nextCGJDecompose
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// new segment or too many combining characters: exit normalization
|
// new segment or too many combining characters: exit normalization
|
||||||
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||||
|
@ -357,6 +361,7 @@ func nextCGJDecompose(i *Iter) []byte {
|
||||||
i.rb.ss = 0
|
i.rb.ss = 0
|
||||||
i.rb.insertCGJ()
|
i.rb.insertCGJ()
|
||||||
i.next = nextDecomposed
|
i.next = nextDecomposed
|
||||||
|
i.rb.ss.first(i.info)
|
||||||
buf := doNormDecomposed(i)
|
buf := doNormDecomposed(i)
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
@ -365,7 +370,6 @@ func nextCGJDecompose(i *Iter) []byte {
|
||||||
func nextComposed(i *Iter) []byte {
|
func nextComposed(i *Iter) []byte {
|
||||||
outp, startp := 0, i.p
|
outp, startp := 0, i.p
|
||||||
var prevCC uint8
|
var prevCC uint8
|
||||||
ss := mkStreamSafe(i.info)
|
|
||||||
for {
|
for {
|
||||||
if !i.info.isYesC() {
|
if !i.info.isYesC() {
|
||||||
goto doNorm
|
goto doNorm
|
||||||
|
@ -385,11 +389,12 @@ func nextComposed(i *Iter) []byte {
|
||||||
i.setDone()
|
i.setDone()
|
||||||
break
|
break
|
||||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||||
|
i.rb.ss = 0
|
||||||
i.next = i.asciiF
|
i.next = i.asciiF
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
if v := ss.next(i.info); v == ssStarter {
|
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||||
break
|
break
|
||||||
} else if v == ssOverflow {
|
} else if v == ssOverflow {
|
||||||
i.next = nextCGJCompose
|
i.next = nextCGJCompose
|
||||||
|
@ -401,8 +406,10 @@ func nextComposed(i *Iter) []byte {
|
||||||
}
|
}
|
||||||
return i.returnSlice(startp, i.p)
|
return i.returnSlice(startp, i.p)
|
||||||
doNorm:
|
doNorm:
|
||||||
|
// reset to start position
|
||||||
i.p = startp
|
i.p = startp
|
||||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||||
|
i.rb.ss.first(i.info)
|
||||||
if i.info.multiSegment() {
|
if i.info.multiSegment() {
|
||||||
d := i.info.Decomposition()
|
d := i.info.Decomposition()
|
||||||
info := i.rb.f.info(input{bytes: d}, 0)
|
info := i.rb.f.info(input{bytes: d}, 0)
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Note: the file data_test.go that is generated should not be checked in.
|
||||||
//go:generate go run maketables.go triegen.go
|
//go:generate go run maketables.go triegen.go
|
||||||
//go:generate go run maketables.go triegen.go -test
|
//go:generate go test -tags test
|
||||||
|
|
||||||
// Package norm contains types and functions for normalizing Unicode strings.
|
// Package norm contains types and functions for normalizing Unicode strings.
|
||||||
package norm // import "golang.org/x/text/unicode/norm"
|
package norm // import "golang.org/x/text/unicode/norm"
|
||||||
|
@ -323,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
|
||||||
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
||||||
switch ss.next(info) {
|
switch ss.next(info) {
|
||||||
case ssStarter:
|
case ssStarter:
|
||||||
ss.first(info)
|
|
||||||
lastSegStart = i
|
lastSegStart = i
|
||||||
case ssOverflow:
|
case ssOverflow:
|
||||||
return lastSegStart, false
|
return lastSegStart, false
|
||||||
|
@ -440,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
// TODO: Using streamSafe to determine the boundary isn't the same as
|
||||||
|
// using BoundaryBefore. Determine which should be used.
|
||||||
if s := ss.next(info); s != ssSuccess {
|
if s := ss.next(info); s != ssSuccess {
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
@ -504,15 +506,14 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
|
||||||
if info.size == 0 {
|
if info.size == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
if rb.nrune > 0 {
|
if s := rb.ss.next(info); s == ssStarter {
|
||||||
if s := rb.ss.next(info); s == ssStarter {
|
// TODO: this could be removed if we don't support merging.
|
||||||
goto end
|
if rb.nrune > 0 {
|
||||||
} else if s == ssOverflow {
|
|
||||||
rb.insertCGJ()
|
|
||||||
goto end
|
goto end
|
||||||
}
|
}
|
||||||
} else {
|
} else if s == ssOverflow {
|
||||||
rb.ss.first(info)
|
rb.insertCGJ()
|
||||||
|
goto end
|
||||||
}
|
}
|
||||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
||||||
return int(err)
|
return int(err)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,7 +40,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func flushTransform(rb *reorderBuffer) bool {
|
func flushTransform(rb *reorderBuffer) bool {
|
||||||
// Write out (must fully fit in dst, or else it is a ErrShortDst).
|
// Write out (must fully fit in dst, or else it is an ErrShortDst).
|
||||||
if len(rb.out) < rb.nrune*utf8.UTFMax {
|
if len(rb.out) < rb.nrune*utf8.UTFMax {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
// Code generated by "stringer -type=Kind"; DO NOT EDIT.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
|
||||||
|
|
||||||
|
var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
|
||||||
|
|
||||||
|
func (i Kind) String() string {
|
||||||
|
if i < 0 || i >= Kind(len(_Kind_index)-1) {
|
||||||
|
return fmt.Sprintf("Kind(%d)", i)
|
||||||
|
}
|
||||||
|
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,239 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
type foldTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
if src[n] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if elem(v)&tagNeedsFold != 0 {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
start, end := nSrc, len(src)
|
||||||
|
if d := len(dst) - nDst; d < end-start {
|
||||||
|
end = nSrc + d
|
||||||
|
}
|
||||||
|
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||||
|
}
|
||||||
|
n := copy(dst[nDst:], src[start:nSrc])
|
||||||
|
if nDst += n; nDst == len(dst) {
|
||||||
|
nSrc = start + n
|
||||||
|
if nSrc == len(src) {
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if elem(v)&tagNeedsFold == 0 {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type narrowTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
if src[n] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||||
|
} else {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
start, end := nSrc, len(src)
|
||||||
|
if d := len(dst) - nDst; d < end-start {
|
||||||
|
end = nSrc + d
|
||||||
|
}
|
||||||
|
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||||
|
}
|
||||||
|
n := copy(dst[nDst:], src[start:nSrc])
|
||||||
|
if nDst += n; nDst == len(dst) {
|
||||||
|
nSrc = start + n
|
||||||
|
if nSrc == len(src) {
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type wideTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||||
|
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||||
|
// not enough to warrant the extra code and complexity.
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||||
|
} else {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||||
|
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||||
|
// not enough to warrant the extra code and complexity.
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||||
|
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||||
|
// the inverseData array.
|
||||||
|
type elem uint16
|
||||||
|
|
||||||
|
const (
|
||||||
|
tagNeutral elem = iota << typeShift
|
||||||
|
tagAmbiguous
|
||||||
|
tagWide
|
||||||
|
tagNarrow
|
||||||
|
tagFullwidth
|
||||||
|
tagHalfwidth
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
numTypeBits = 3
|
||||||
|
typeShift = 16 - numTypeBits
|
||||||
|
|
||||||
|
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||||
|
// the Won sign U+20A9.
|
||||||
|
tagNeedsFold = 0x1000
|
||||||
|
|
||||||
|
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||||
|
// variant.
|
||||||
|
wonSign rune = 0x20A9
|
||||||
|
)
|
|
@ -0,0 +1,206 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate stringer -type=Kind
|
||||||
|
//go:generate go run gen.go gen_common.go gen_trieval.go
|
||||||
|
|
||||||
|
// Package width provides functionality for handling different widths in text.
|
||||||
|
//
|
||||||
|
// Wide characters behave like ideographs; they tend to allow line breaks after
|
||||||
|
// each character and remain upright in vertical text layout. Narrow characters
|
||||||
|
// are kept together in words or runs that are rotated sideways in vertical text
|
||||||
|
// layout.
|
||||||
|
//
|
||||||
|
// For more information, see http://unicode.org/reports/tr11/.
|
||||||
|
package width // import "golang.org/x/text/width"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// 1) Reduce table size by compressing blocks.
|
||||||
|
// 2) API proposition for computing display length
|
||||||
|
// (approximation, fixed pitch only).
|
||||||
|
// 3) Implement display length.
|
||||||
|
|
||||||
|
// Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
|
||||||
|
type Kind int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Neutral characters do not occur in legacy East Asian character sets.
|
||||||
|
Neutral Kind = iota
|
||||||
|
|
||||||
|
// EastAsianAmbiguous characters that can be sometimes wide and sometimes
|
||||||
|
// narrow and require additional information not contained in the character
|
||||||
|
// code to further resolve their width.
|
||||||
|
EastAsianAmbiguous
|
||||||
|
|
||||||
|
// EastAsianWide characters are wide in its usual form. They occur only in
|
||||||
|
// the context of East Asian typography. These runes may have explicit
|
||||||
|
// halfwidth counterparts.
|
||||||
|
EastAsianWide
|
||||||
|
|
||||||
|
// EastAsianNarrow characters are narrow in its usual form. They often have
|
||||||
|
// fullwidth counterparts.
|
||||||
|
EastAsianNarrow
|
||||||
|
|
||||||
|
// Note: there exist Narrow runes that do not have fullwidth or wide
|
||||||
|
// counterparts, despite what the definition says (e.g. U+27E6).
|
||||||
|
|
||||||
|
// EastAsianFullwidth characters have a compatibility decompositions of type
|
||||||
|
// wide that map to a narrow counterpart.
|
||||||
|
EastAsianFullwidth
|
||||||
|
|
||||||
|
// EastAsianHalfwidth characters have a compatibility decomposition of type
|
||||||
|
// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
|
||||||
|
// SIGN.
|
||||||
|
EastAsianHalfwidth
|
||||||
|
|
||||||
|
// Note: there exist runes that have a halfwidth counterparts but that are
|
||||||
|
// classified as Ambiguous, rather than wide (e.g. U+2190).
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: the generated tries need to return size 1 for invalid runes for the
|
||||||
|
// width to be computed correctly (each byte should render width 1)
|
||||||
|
|
||||||
|
var trie = newWidthTrie(0)
|
||||||
|
|
||||||
|
// Lookup reports the Properties of the first rune in b and the number of bytes
|
||||||
|
// of its UTF-8 encoding.
|
||||||
|
func Lookup(b []byte) (p Properties, size int) {
|
||||||
|
v, sz := trie.lookup(b)
|
||||||
|
return Properties{elem(v), b[sz-1]}, sz
|
||||||
|
}
|
||||||
|
|
||||||
|
// LookupString reports the Properties of the first rune in s and the number of
|
||||||
|
// bytes of its UTF-8 encoding.
|
||||||
|
func LookupString(s string) (p Properties, size int) {
|
||||||
|
v, sz := trie.lookupString(s)
|
||||||
|
return Properties{elem(v), s[sz-1]}, sz
|
||||||
|
}
|
||||||
|
|
||||||
|
// LookupRune reports the Properties of rune r.
|
||||||
|
func LookupRune(r rune) Properties {
|
||||||
|
var buf [4]byte
|
||||||
|
n := utf8.EncodeRune(buf[:], r)
|
||||||
|
v, _ := trie.lookup(buf[:n])
|
||||||
|
last := byte(r)
|
||||||
|
if r >= utf8.RuneSelf {
|
||||||
|
last = 0x80 + byte(r&0x3f)
|
||||||
|
}
|
||||||
|
return Properties{elem(v), last}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Properties provides access to width properties of a rune.
|
||||||
|
type Properties struct {
|
||||||
|
elem elem
|
||||||
|
last byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e elem) kind() Kind {
|
||||||
|
return Kind(e >> typeShift)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kind returns the Kind of a rune as defined in Unicode TR #11.
|
||||||
|
// See http://unicode.org/reports/tr11/ for more details.
|
||||||
|
func (p Properties) Kind() Kind {
|
||||||
|
return p.elem.kind()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Folded returns the folded variant of a rune or 0 if the rune is canonical.
|
||||||
|
func (p Properties) Folded() rune {
|
||||||
|
if p.elem&tagNeedsFold != 0 {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Narrow returns the narrow variant of a rune or 0 if the rune is already
|
||||||
|
// narrow or doesn't have a narrow variant.
|
||||||
|
func (p Properties) Narrow() rune {
|
||||||
|
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wide returns the wide variant of a rune or 0 if the rune is already
|
||||||
|
// wide or doesn't have a wide variant.
|
||||||
|
func (p Properties) Wide() rune {
|
||||||
|
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO for Properties:
|
||||||
|
// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
|
||||||
|
// mapping.
|
||||||
|
// - Add width information (including information on non-spacing runes).
|
||||||
|
|
||||||
|
// Transformer implements the transform.Transformer interface.
|
||||||
|
type Transformer struct {
|
||||||
|
t transform.SpanningTransformer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Reset() { t.t.Reset() }
|
||||||
|
|
||||||
|
// Transform implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
return t.t.Transform(dst, src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements the transform.SpanningTransformer interface.
|
||||||
|
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
return t.t.Span(src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of applying t to b.
|
||||||
|
func (t Transformer) Bytes(b []byte) []byte {
|
||||||
|
b, _, _ = transform.Bytes(t, b)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of applying t to s.
|
||||||
|
func (t Transformer) String(s string) string {
|
||||||
|
s, _, _ = transform.String(t, s)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Fold is a transform that maps all runes to their canonical width.
|
||||||
|
//
|
||||||
|
// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
|
||||||
|
// provide a more generic folding mechanism.
|
||||||
|
Fold Transformer = Transformer{foldTransform{}}
|
||||||
|
|
||||||
|
// Widen is a transform that maps runes to their wide variant, if
|
||||||
|
// available.
|
||||||
|
Widen Transformer = Transformer{wideTransform{}}
|
||||||
|
|
||||||
|
// Narrow is a transform that maps runes to their narrow variant, if
|
||||||
|
// available.
|
||||||
|
Narrow Transformer = Transformer{narrowTransform{}}
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: Consider the following options:
|
||||||
|
// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
|
||||||
|
// generalized variant of this.
|
||||||
|
// - Consider a wide Won character to be the default width (or some generalized
|
||||||
|
// variant of this).
|
||||||
|
// - Filter the set of characters that gets converted (the preferred approach is
|
||||||
|
// to allow applying filters to transforms).
|
Loading…
Reference in New Issue