synced with master
This commit is contained in:
commit
f889a3fcb3
|
@ -28,6 +28,15 @@
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## v0.19.1
|
||||||
|
|
||||||
|
- Fixed `tokenizer.Scan()/ScanAll()` to ignore the separators from the default trim cutset.
|
||||||
|
An option to return also the empty found tokens was also added via `Tokenizer.KeepEmptyTokens(true)`.
|
||||||
|
_This should fix the parsing of whitespace charactes around view query column names when no quotes are used ([#3616](https://github.com/pocketbase/pocketbase/discussions/3616#discussioncomment-7398564))._
|
||||||
|
|
||||||
|
- Fixed the `:excerpt(max, withEllipsis?)` `field` query param modifier to properly add space to the generated text fragment after block tags.
|
||||||
|
|
||||||
|
|
||||||
## v0.19.0
|
## v0.19.0
|
||||||
|
|
||||||
- Added Patreon OAuth2 provider ([#3323](https://github.com/pocketbase/pocketbase/pull/3323); thanks @ghostdevv).
|
- Added Patreon OAuth2 provider ([#3323](https://github.com/pocketbase/pocketbase/pull/3323); thanks @ghostdevv).
|
||||||
|
|
|
@ -78,8 +78,7 @@ func (m *excerptModifier) Modify(value any) (any, error) {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
var isNotEmpty bool
|
var hasPrevSpace bool
|
||||||
var needSpace bool
|
|
||||||
|
|
||||||
// for all node types and more details check
|
// for all node types and more details check
|
||||||
// https://pkg.go.dev/golang.org/x/net/html#Parse
|
// https://pkg.go.dev/golang.org/x/net/html#Parse
|
||||||
|
@ -87,37 +86,47 @@ func (m *excerptModifier) Modify(value any) (any, error) {
|
||||||
stripTags = func(n *html.Node) {
|
stripTags = func(n *html.Node) {
|
||||||
switch n.Type {
|
switch n.Type {
|
||||||
case html.TextNode:
|
case html.TextNode:
|
||||||
if txt := strings.TrimSpace(whitespaceRegex.ReplaceAllString(n.Data, " ")); txt != "" {
|
// collapse multiple spaces into one
|
||||||
if isNotEmpty && needSpace {
|
txt := whitespaceRegex.ReplaceAllString(n.Data, " ")
|
||||||
needSpace = false
|
|
||||||
builder.WriteString(" ")
|
if hasPrevSpace {
|
||||||
|
txt = strings.TrimLeft(txt, " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if txt != "" {
|
||||||
|
hasPrevSpace = strings.HasSuffix(txt, " ")
|
||||||
|
|
||||||
builder.WriteString(txt)
|
builder.WriteString(txt)
|
||||||
|
|
||||||
if !isNotEmpty {
|
|
||||||
isNotEmpty = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case html.ElementNode:
|
|
||||||
if !needSpace && !list.ExistInSlice(n.Data, inlineTags) {
|
|
||||||
needSpace = true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if builder.Len() > m.max {
|
// excerpt max has been reached => no need to further iterate
|
||||||
|
// (+2 for the extra whitespace suffix/prefix that will be trimmed later)
|
||||||
|
if builder.Len() > m.max+2 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) {
|
if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) {
|
||||||
|
isBlock := c.Type == html.ElementNode && !list.ExistInSlice(c.Data, inlineTags)
|
||||||
|
|
||||||
|
if isBlock && !hasPrevSpace {
|
||||||
|
builder.WriteString(" ")
|
||||||
|
hasPrevSpace = true
|
||||||
|
}
|
||||||
|
|
||||||
stripTags(c)
|
stripTags(c)
|
||||||
|
|
||||||
|
if isBlock && !hasPrevSpace {
|
||||||
|
builder.WriteString(" ")
|
||||||
|
hasPrevSpace = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stripTags(doc)
|
stripTags(doc)
|
||||||
|
|
||||||
result := builder.String()
|
result := strings.TrimSpace(builder.String())
|
||||||
|
|
||||||
if len(result) > m.max {
|
if len(result) > m.max {
|
||||||
result = strings.TrimSpace(result[:m.max])
|
result = strings.TrimSpace(result[:m.max])
|
||||||
|
|
|
@ -84,11 +84,10 @@ func TestNewExcerptModifier(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestExcerptModifierModify(t *testing.T) {
|
func TestExcerptModifierModify(t *testing.T) {
|
||||||
// plain text value: "Hello t est12 3 word"
|
|
||||||
html := ` <script>var a = 123;</script> <p>Hello</p><div id="test_id">t est<b>12
|
html := ` <script>var a = 123;</script> <p>Hello</p><div id="test_id">t est<b>12
|
||||||
3</b></div> <h1>word </h1> `
|
3</b><span>456</span></div><span>word <b>7</b> 89<span>!<b>?</b><b> a </b><b>b </b>c</span>#<h1>title</h1>`
|
||||||
|
|
||||||
plainText := "Hello t est12 3 word"
|
plainText := "Hello t est12 3456 word 7 89!? a b c# title"
|
||||||
|
|
||||||
scenarios := []struct {
|
scenarios := []struct {
|
||||||
name string
|
name string
|
||||||
|
|
|
@ -21,6 +21,8 @@ const eof = rune(0)
|
||||||
// DefaultSeparators is a list with the default token separator characters.
|
// DefaultSeparators is a list with the default token separator characters.
|
||||||
var DefaultSeparators = []rune{','}
|
var DefaultSeparators = []rune{','}
|
||||||
|
|
||||||
|
var whitespaceChars = []rune{'\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0}
|
||||||
|
|
||||||
// NewFromString creates new Tokenizer from the provided string.
|
// NewFromString creates new Tokenizer from the provided string.
|
||||||
func NewFromString(str string) *Tokenizer {
|
func NewFromString(str string) *Tokenizer {
|
||||||
return New(strings.NewReader(str))
|
return New(strings.NewReader(str))
|
||||||
|
@ -33,12 +35,11 @@ func NewFromBytes(b []byte) *Tokenizer {
|
||||||
|
|
||||||
// New creates new Tokenizer from the provided reader with DefaultSeparators.
|
// New creates new Tokenizer from the provided reader with DefaultSeparators.
|
||||||
func New(r io.Reader) *Tokenizer {
|
func New(r io.Reader) *Tokenizer {
|
||||||
return &Tokenizer{
|
t := &Tokenizer{r: bufio.NewReader(r)}
|
||||||
r: bufio.NewReader(r),
|
|
||||||
separators: DefaultSeparators,
|
t.Separators(DefaultSeparators...)
|
||||||
keepSeparator: false,
|
|
||||||
ignoreParenthesis: false,
|
return t
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenizer defines a struct that parses a reader into tokens while
|
// Tokenizer defines a struct that parses a reader into tokens while
|
||||||
|
@ -46,14 +47,18 @@ func New(r io.Reader) *Tokenizer {
|
||||||
type Tokenizer struct {
|
type Tokenizer struct {
|
||||||
r *bufio.Reader
|
r *bufio.Reader
|
||||||
|
|
||||||
|
trimCutset string
|
||||||
separators []rune
|
separators []rune
|
||||||
keepSeparator bool
|
keepSeparator bool
|
||||||
|
keepEmptyTokens bool
|
||||||
ignoreParenthesis bool
|
ignoreParenthesis bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separators defines the provided separatos of the current Tokenizer.
|
// Separators defines the provided separatos of the current Tokenizer.
|
||||||
func (t *Tokenizer) Separators(separators ...rune) {
|
func (t *Tokenizer) Separators(separators ...rune) {
|
||||||
t.separators = separators
|
t.separators = separators
|
||||||
|
|
||||||
|
t.rebuildTrimCutset()
|
||||||
}
|
}
|
||||||
|
|
||||||
// KeepSeparator defines whether to keep the separator rune as part
|
// KeepSeparator defines whether to keep the separator rune as part
|
||||||
|
@ -62,35 +67,37 @@ func (t *Tokenizer) KeepSeparator(state bool) {
|
||||||
t.keepSeparator = state
|
t.keepSeparator = state
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// KeepEmptyTokens defines whether to keep empty tokens on Scan() (default to false).
|
||||||
|
func (t *Tokenizer) KeepEmptyTokens(state bool) {
|
||||||
|
t.keepEmptyTokens = state
|
||||||
|
}
|
||||||
|
|
||||||
// IgnoreParenthesis defines whether to ignore the parenthesis boundaries
|
// IgnoreParenthesis defines whether to ignore the parenthesis boundaries
|
||||||
// and to treat the '(' and ')' as regular characters.
|
// and to treat the '(' and ')' as regular characters.
|
||||||
func (t *Tokenizer) IgnoreParenthesis(state bool) {
|
func (t *Tokenizer) IgnoreParenthesis(state bool) {
|
||||||
t.ignoreParenthesis = state
|
t.ignoreParenthesis = state
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan reads and returns the next available token from the Tokenizer's buffer (trimmed).
|
// Scan reads and returns the next available token from the Tokenizer's buffer (trimmed!).
|
||||||
|
//
|
||||||
|
// Empty tokens are skipped if t.keepEmptyTokens is not set (which is the default).
|
||||||
//
|
//
|
||||||
// Returns [io.EOF] error when there are no more tokens to scan.
|
// Returns [io.EOF] error when there are no more tokens to scan.
|
||||||
func (t *Tokenizer) Scan() (string, error) {
|
func (t *Tokenizer) Scan() (string, error) {
|
||||||
ch := t.read()
|
ch := t.read()
|
||||||
|
|
||||||
if ch == eof {
|
if ch == eof {
|
||||||
return "", io.EOF
|
return "", io.EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
if isWhitespaceRune(ch) {
|
|
||||||
t.readWhiteSpaces()
|
|
||||||
} else {
|
|
||||||
t.unread()
|
t.unread()
|
||||||
}
|
|
||||||
|
|
||||||
token, err := t.readToken()
|
token, err := t.readToken()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
// read all remaining whitespaces
|
if !t.keepEmptyTokens && token == "" {
|
||||||
t.readWhiteSpaces()
|
return t.Scan()
|
||||||
|
}
|
||||||
|
|
||||||
return token, err
|
return token, err
|
||||||
}
|
}
|
||||||
|
@ -129,12 +136,12 @@ func (t *Tokenizer) readToken() (string, error) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if !isEscapeRune(prevCh) {
|
if !t.isEscapeRune(prevCh) {
|
||||||
if !t.ignoreParenthesis && ch == '(' && quoteCh == eof {
|
if !t.ignoreParenthesis && ch == '(' && quoteCh == eof {
|
||||||
parenthesis++ // opening parenthesis
|
parenthesis++ // opening parenthesis
|
||||||
} else if !t.ignoreParenthesis && ch == ')' && parenthesis > 0 && quoteCh == eof {
|
} else if !t.ignoreParenthesis && ch == ')' && parenthesis > 0 && quoteCh == eof {
|
||||||
parenthesis-- // closing parenthesis
|
parenthesis-- // closing parenthesis
|
||||||
} else if isQuoteRune(ch) {
|
} else if t.isQuoteRune(ch) {
|
||||||
if quoteCh == ch {
|
if quoteCh == ch {
|
||||||
quoteCh = eof // closing quote
|
quoteCh = eof // closing quote
|
||||||
} else if quoteCh == eof {
|
} else if quoteCh == eof {
|
||||||
|
@ -158,7 +165,7 @@ func (t *Tokenizer) readToken() (string, error) {
|
||||||
return "", fmt.Errorf("unbalanced parenthesis or quoted expression: %q", buf.String())
|
return "", fmt.Errorf("unbalanced parenthesis or quoted expression: %q", buf.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf.String(), nil
|
return strings.Trim(buf.String(), t.trimCutset), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// readWhiteSpaces consumes all contiguous whitespace runes.
|
// readWhiteSpaces consumes all contiguous whitespace runes.
|
||||||
|
@ -170,7 +177,7 @@ func (t *Tokenizer) readWhiteSpaces() {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if !t.isSeperatorRune(ch) {
|
if !t.isWhitespaceRune(ch) {
|
||||||
t.unread()
|
t.unread()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -193,6 +200,20 @@ func (t *Tokenizer) unread() error {
|
||||||
return t.r.UnreadRune()
|
return t.r.UnreadRune()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rebuildTrimCutset rebuilds the tokenizer trimCutset based on its separator runes.
|
||||||
|
func (t *Tokenizer) rebuildTrimCutset() {
|
||||||
|
var cutset strings.Builder
|
||||||
|
|
||||||
|
for _, w := range whitespaceChars {
|
||||||
|
if t.isSeperatorRune(w) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cutset.WriteRune(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.trimCutset = cutset.String()
|
||||||
|
}
|
||||||
|
|
||||||
// isSeperatorRune checks if a rune is a token part separator.
|
// isSeperatorRune checks if a rune is a token part separator.
|
||||||
func (t *Tokenizer) isSeperatorRune(ch rune) bool {
|
func (t *Tokenizer) isSeperatorRune(ch rune) bool {
|
||||||
for _, r := range t.separators {
|
for _, r := range t.separators {
|
||||||
|
@ -204,17 +225,23 @@ func (t *Tokenizer) isSeperatorRune(ch rune) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// isWhitespaceRune checks if a rune is a space, tab, or newline.
|
// isWhitespaceRune checks if a rune is a space character (eg. space, tab, new line).
|
||||||
func isWhitespaceRune(ch rune) bool {
|
func (t *Tokenizer) isWhitespaceRune(ch rune) bool {
|
||||||
return ch == ' ' || ch == '\t' || ch == '\n'
|
for _, c := range whitespaceChars {
|
||||||
|
if c == ch {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// isQuoteRune checks if a rune is a quote.
|
// isQuoteRune checks if a rune is a quote.
|
||||||
func isQuoteRune(ch rune) bool {
|
func (t *Tokenizer) isQuoteRune(ch rune) bool {
|
||||||
return ch == '\'' || ch == '"' || ch == '`'
|
return ch == '\'' || ch == '"' || ch == '`'
|
||||||
}
|
}
|
||||||
|
|
||||||
// isEscapeRune checks if a rune is an escape character.
|
// isEscapeRune checks if a rune is an escape character.
|
||||||
func isEscapeRune(ch rune) bool {
|
func (t *Tokenizer) isEscapeRune(ch rune) bool {
|
||||||
return ch == '\\'
|
return ch == '\\'
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,6 +95,7 @@ func TestScanAll(t *testing.T) {
|
||||||
content string
|
content string
|
||||||
separators []rune
|
separators []rune
|
||||||
keepSeparator bool
|
keepSeparator bool
|
||||||
|
keepEmptyTokens bool
|
||||||
ignoreParenthesis bool
|
ignoreParenthesis bool
|
||||||
expectError bool
|
expectError bool
|
||||||
expectTokens []string
|
expectTokens []string
|
||||||
|
@ -104,6 +105,7 @@ func TestScanAll(t *testing.T) {
|
||||||
content: "",
|
content: "",
|
||||||
separators: DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: nil,
|
expectTokens: nil,
|
||||||
|
@ -113,6 +115,7 @@ func TestScanAll(t *testing.T) {
|
||||||
content: `(a,b() c`,
|
content: `(a,b() c`,
|
||||||
separators: DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: true,
|
expectError: true,
|
||||||
expectTokens: []string{},
|
expectTokens: []string{},
|
||||||
|
@ -122,6 +125,7 @@ func TestScanAll(t *testing.T) {
|
||||||
content: `'asd"`,
|
content: `'asd"`,
|
||||||
separators: DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: true,
|
expectError: true,
|
||||||
expectTokens: []string{},
|
expectTokens: []string{},
|
||||||
|
@ -131,15 +135,18 @@ func TestScanAll(t *testing.T) {
|
||||||
content: `a, b, c, d, e 123, "abc"`,
|
content: `a, b, c, d, e 123, "abc"`,
|
||||||
separators: nil,
|
separators: nil,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: []string{`a, b, c, d, e 123, "abc"`},
|
expectTokens: []string{`a, b, c, d, e 123, "abc"`},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "default separators",
|
name: "default separators",
|
||||||
content: `a, b, c, d e, "a,b, c ", (123, 456)`,
|
content: `a, b , c , d e , "a,b, c " , ,, , (123, 456)
|
||||||
|
`,
|
||||||
separators: DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: []string{
|
expectTokens: []string{
|
||||||
|
@ -152,70 +159,49 @@ func TestScanAll(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "default separators (with preserve)",
|
name: "keep separators",
|
||||||
content: `a, b, c, d e, "a,b, c ", (123, 456)`,
|
content: `a, b, c, d e, "a,b, c ", (123, 456)`,
|
||||||
separators: DefaultSeparators,
|
separators: []rune{',', ' '}, // the space should be removed from the cutset
|
||||||
keepSeparator: true,
|
keepSeparator: true,
|
||||||
|
keepEmptyTokens: true,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: []string{
|
expectTokens: []string{
|
||||||
"a,",
|
"a,",
|
||||||
|
" ",
|
||||||
"b,",
|
"b,",
|
||||||
|
" ",
|
||||||
"c,",
|
"c,",
|
||||||
"d e,",
|
" ",
|
||||||
|
"d ",
|
||||||
|
" ",
|
||||||
|
"e,",
|
||||||
|
" ",
|
||||||
`"a,b, c ",`,
|
`"a,b, c ",`,
|
||||||
`(123, 456)`,
|
`(123, 456)`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "custom separators",
|
name: "custom separators",
|
||||||
content: ` a , 123.456, b, c d, (
|
content: `a | b c d &(e + f) & "g & h" & & &`,
|
||||||
test (a,b,c) " 123 "
|
separators: []rune{'|', '&'},
|
||||||
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
|
||||||
separators: []rune{',', ' ', '\t', '\n'},
|
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: false,
|
ignoreParenthesis: false,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: []string{
|
expectTokens: []string{
|
||||||
"a",
|
"a",
|
||||||
"123.456",
|
"b c d",
|
||||||
"b",
|
"(e + f)",
|
||||||
"c",
|
`"g & h"`,
|
||||||
"d",
|
|
||||||
"(\n\t\t\t\ttest (a,b,c) \" 123 \"\n\t\t\t)",
|
|
||||||
`"(abc d"`,
|
|
||||||
`"abc) d"`,
|
|
||||||
`"(abc) d \" "`,
|
|
||||||
`'abc "'`,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "custom separators (with preserve)",
|
|
||||||
content: ` a , 123.456, b, c d, (
|
|
||||||
test (a,b,c) " 123 "
|
|
||||||
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
|
||||||
separators: []rune{',', ' ', '\t', '\n'},
|
|
||||||
keepSeparator: true,
|
|
||||||
ignoreParenthesis: false,
|
|
||||||
expectError: false,
|
|
||||||
expectTokens: []string{
|
|
||||||
"a ",
|
|
||||||
"123.456,",
|
|
||||||
"b,",
|
|
||||||
"c ",
|
|
||||||
"d,",
|
|
||||||
"(\n\t\t\t\ttest (a,b,c) \" 123 \"\n\t\t\t),",
|
|
||||||
`"(abc d",`,
|
|
||||||
`"abc) d",`,
|
|
||||||
`"(abc) d \" " `,
|
|
||||||
`'abc "'`,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "ignoring parenthesis",
|
name: "ignoring parenthesis",
|
||||||
content: `a, b, (c,d)`,
|
content: `a, b, (c,d)`,
|
||||||
separators: []rune{','},
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: false,
|
||||||
ignoreParenthesis: true,
|
ignoreParenthesis: true,
|
||||||
expectError: false,
|
expectError: false,
|
||||||
expectTokens: []string{
|
expectTokens: []string{
|
||||||
|
@ -225,6 +211,26 @@ func TestScanAll(t *testing.T) {
|
||||||
"d)",
|
"d)",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "keep empty tokens",
|
||||||
|
content: `a, b, (c, d), ,, , e, , f`,
|
||||||
|
separators: DefaultSeparators,
|
||||||
|
keepSeparator: false,
|
||||||
|
keepEmptyTokens: true,
|
||||||
|
ignoreParenthesis: false,
|
||||||
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
|
"a",
|
||||||
|
"b",
|
||||||
|
"(c, d)",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"e",
|
||||||
|
"",
|
||||||
|
"f",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range scenarios {
|
for _, s := range scenarios {
|
||||||
|
@ -233,6 +239,7 @@ func TestScanAll(t *testing.T) {
|
||||||
|
|
||||||
tk.Separators(s.separators...)
|
tk.Separators(s.separators...)
|
||||||
tk.KeepSeparator(s.keepSeparator)
|
tk.KeepSeparator(s.keepSeparator)
|
||||||
|
tk.KeepEmptyTokens(s.keepEmptyTokens)
|
||||||
tk.IgnoreParenthesis(s.ignoreParenthesis)
|
tk.IgnoreParenthesis(s.ignoreParenthesis)
|
||||||
|
|
||||||
tokens, err := tk.ScanAll()
|
tokens, err := tk.ScanAll()
|
||||||
|
@ -255,9 +262,42 @@ func TestScanAll(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !exists {
|
if !exists {
|
||||||
t.Fatalf("Unexpected token %s", tok)
|
t.Fatalf("Unexpected token %q", tok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTrimCutset(t *testing.T) {
|
||||||
|
scenarios := []struct {
|
||||||
|
name string
|
||||||
|
separators []rune
|
||||||
|
expectedCutset string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"default factory separators",
|
||||||
|
nil,
|
||||||
|
"\t\n\v\f\r \u0085\u00a0",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"custom separators",
|
||||||
|
[]rune{'\t', ' ', '\r', ','},
|
||||||
|
"\n\v\f\u0085\u00a0",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range scenarios {
|
||||||
|
t.Run(s.name, func(t *testing.T) {
|
||||||
|
tk := NewFromString("")
|
||||||
|
|
||||||
|
if len(s.separators) > 0 {
|
||||||
|
tk.Separators(s.separators...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tk.trimCutset != s.expectedCutset {
|
||||||
|
t.Fatalf("Expected cutset %q, got %q", s.expectedCutset, tk.trimCutset)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue