gotosocial/vendor/github.com/go-openapi/swag/split.go

509 lines
11 KiB
Go
Raw Normal View History

// Copyright 2015 go-swagger maintainers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package swag
import (
"bytes"
"sync"
"unicode"
"unicode/utf8"
)
type (
splitter struct {
initialisms []string
initialismsRunes [][]rune
initialismsUpperCased [][]rune // initialisms cached in their trimmed, upper-cased version
postSplitInitialismCheck bool
}
splitterOption func(*splitter)
initialismMatch struct {
body []rune
start, end int
complete bool
}
initialismMatches []initialismMatch
)
type (
// memory pools of temporary objects.
//
// These are used to recycle temporarily allocated objects
// and relieve the GC from undue pressure.
matchesPool struct {
*sync.Pool
}
buffersPool struct {
*sync.Pool
}
lexemsPool struct {
*sync.Pool
}
splittersPool struct {
*sync.Pool
}
)
var (
// poolOfMatches holds temporary slices for recycling during the initialism match process
poolOfMatches = matchesPool{
Pool: &sync.Pool{
New: func() any {
s := make(initialismMatches, 0, maxAllocMatches)
return &s
},
},
}
poolOfBuffers = buffersPool{
Pool: &sync.Pool{
New: func() any {
return new(bytes.Buffer)
},
},
}
poolOfLexems = lexemsPool{
Pool: &sync.Pool{
New: func() any {
s := make([]nameLexem, 0, maxAllocMatches)
return &s
},
},
}
poolOfSplitters = splittersPool{
Pool: &sync.Pool{
New: func() any {
s := newSplitter()
return &s
},
},
}
)
// nameReplaceTable finds a word representation for special characters.
func nameReplaceTable(r rune) (string, bool) {
switch r {
case '@':
return "At ", true
case '&':
return "And ", true
case '|':
return "Pipe ", true
case '$':
return "Dollar ", true
case '!':
return "Bang ", true
case '-':
return "", true
case '_':
return "", true
default:
return "", false
}
}
// split calls the splitter.
//
// Use newSplitter for more control and options
func split(str string) []string {
s := poolOfSplitters.BorrowSplitter()
lexems := s.split(str)
result := make([]string, 0, len(*lexems))
for _, lexem := range *lexems {
result = append(result, lexem.GetOriginal())
}
poolOfLexems.RedeemLexems(lexems)
poolOfSplitters.RedeemSplitter(s)
return result
}
func newSplitter(options ...splitterOption) splitter {
s := splitter{
postSplitInitialismCheck: false,
initialisms: initialisms,
initialismsRunes: initialismsRunes,
initialismsUpperCased: initialismsUpperCased,
}
for _, option := range options {
option(&s)
}
return s
}
// withPostSplitInitialismCheck allows to catch initialisms after main split process
func withPostSplitInitialismCheck(s *splitter) {
s.postSplitInitialismCheck = true
}
func (p matchesPool) BorrowMatches() *initialismMatches {
s := p.Get().(*initialismMatches)
*s = (*s)[:0] // reset slice, keep allocated capacity
return s
}
func (p buffersPool) BorrowBuffer(size int) *bytes.Buffer {
s := p.Get().(*bytes.Buffer)
s.Reset()
if s.Cap() < size {
s.Grow(size)
}
return s
}
func (p lexemsPool) BorrowLexems() *[]nameLexem {
s := p.Get().(*[]nameLexem)
*s = (*s)[:0] // reset slice, keep allocated capacity
return s
}
func (p splittersPool) BorrowSplitter(options ...splitterOption) *splitter {
s := p.Get().(*splitter)
s.postSplitInitialismCheck = false // reset options
for _, apply := range options {
apply(s)
}
return s
}
func (p matchesPool) RedeemMatches(s *initialismMatches) {
p.Put(s)
}
func (p buffersPool) RedeemBuffer(s *bytes.Buffer) {
p.Put(s)
}
func (p lexemsPool) RedeemLexems(s *[]nameLexem) {
p.Put(s)
}
func (p splittersPool) RedeemSplitter(s *splitter) {
p.Put(s)
}
func (m initialismMatch) isZero() bool {
return m.start == 0 && m.end == 0
}
func (s splitter) split(name string) *[]nameLexem {
nameRunes := []rune(name)
matches := s.gatherInitialismMatches(nameRunes)
if matches == nil {
return poolOfLexems.BorrowLexems()
}
return s.mapMatchesToNameLexems(nameRunes, matches)
}
func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches {
var matches *initialismMatches
for currentRunePosition, currentRune := range nameRunes {
// recycle these allocations as we loop over runes
// with such recycling, only 2 slices should be allocated per call
// instead of o(n).
newMatches := poolOfMatches.BorrowMatches()
// check current initialism matches
if matches != nil { // skip first iteration
for _, match := range *matches {
if keepCompleteMatch := match.complete; keepCompleteMatch {
*newMatches = append(*newMatches, match)
continue
}
// drop failed match
currentMatchRune := match.body[currentRunePosition-match.start]
if currentMatchRune != currentRune {
continue
}
// try to complete ongoing match
if currentRunePosition-match.start == len(match.body)-1 {
// we are close; the next step is to check the symbol ahead
// if it is a small letter, then it is not the end of match
// but beginning of the next word
if currentRunePosition < len(nameRunes)-1 {
nextRune := nameRunes[currentRunePosition+1]
if newWord := unicode.IsLower(nextRune); newWord {
// oh ok, it was the start of a new word
continue
}
}
match.complete = true
match.end = currentRunePosition
}
*newMatches = append(*newMatches, match)
}
}
// check for new initialism matches
for i := range s.initialisms {
initialismRunes := s.initialismsRunes[i]
if initialismRunes[0] == currentRune {
*newMatches = append(*newMatches, initialismMatch{
start: currentRunePosition,
body: initialismRunes,
complete: false,
})
}
}
if matches != nil {
poolOfMatches.RedeemMatches(matches)
}
matches = newMatches
}
// up to the caller to redeem this last slice
return matches
}
func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem {
nameLexems := poolOfLexems.BorrowLexems()
var lastAcceptedMatch initialismMatch
for _, match := range *matches {
if !match.complete {
continue
}
if firstMatch := lastAcceptedMatch.isZero(); firstMatch {
s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start])
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
continue
}
if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch {
continue
}
middle := nameRunes[lastAcceptedMatch.end+1 : match.start]
s.appendBrokenDownCasualString(nameLexems, middle)
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
}
// we have not found any accepted matches
if lastAcceptedMatch.isZero() {
*nameLexems = (*nameLexems)[:0]
s.appendBrokenDownCasualString(nameLexems, nameRunes)
} else if lastAcceptedMatch.end+1 != len(nameRunes) {
rest := nameRunes[lastAcceptedMatch.end+1:]
s.appendBrokenDownCasualString(nameLexems, rest)
}
poolOfMatches.RedeemMatches(matches)
return nameLexems
}
func (s splitter) breakInitialism(original string) nameLexem {
return newInitialismNameLexem(original, original)
}
func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) {
currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused
defer func() {
poolOfBuffers.RedeemBuffer(currentSegment)
}()
addCasualNameLexem := func(original string) {
*segments = append(*segments, newCasualNameLexem(original))
}
addInitialismNameLexem := func(original, match string) {
*segments = append(*segments, newInitialismNameLexem(original, match))
}
var addNameLexem func(string)
if s.postSplitInitialismCheck {
addNameLexem = func(original string) {
for i := range s.initialisms {
if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) {
addInitialismNameLexem(original, s.initialisms[i])
return
}
}
addCasualNameLexem(original)
}
} else {
addNameLexem = addCasualNameLexem
}
for _, rn := range str {
if replace, found := nameReplaceTable(rn); found {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
if replace != "" {
addNameLexem(replace)
}
continue
}
if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
continue
}
if unicode.IsUpper(rn) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
currentSegment.Reset()
}
currentSegment.WriteRune(rn)
}
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
}
// isEqualFoldIgnoreSpace is the same as strings.EqualFold, but
// it ignores leading and trailing blank spaces in the compared
// string.
//
// base is assumed to be composed of upper-cased runes, and be already
// trimmed.
//
// This code is heavily inspired from strings.EqualFold.
func isEqualFoldIgnoreSpace(base []rune, str string) bool {
var i, baseIndex int
// equivalent to b := []byte(str), but without data copy
b := hackStringBytes(str)
for i < len(b) {
if c := b[i]; c < utf8.RuneSelf {
// fast path for ASCII
if c != ' ' && c != '\t' {
break
}
i++
continue
}
// unicode case
r, size := utf8.DecodeRune(b[i:])
if !unicode.IsSpace(r) {
break
}
i += size
}
if i >= len(b) {
return len(base) == 0
}
for _, baseRune := range base {
if i >= len(b) {
break
}
if c := b[i]; c < utf8.RuneSelf {
// single byte rune case (ASCII)
if baseRune >= utf8.RuneSelf {
return false
}
baseChar := byte(baseRune)
if c != baseChar &&
!('a' <= c && c <= 'z' && c-'a'+'A' == baseChar) {
return false
}
baseIndex++
i++
continue
}
// unicode case
r, size := utf8.DecodeRune(b[i:])
if unicode.ToUpper(r) != baseRune {
return false
}
baseIndex++
i += size
}
if baseIndex != len(base) {
return false
}
// all passed: now we should only have blanks
for i < len(b) {
if c := b[i]; c < utf8.RuneSelf {
// fast path for ASCII
if c != ' ' && c != '\t' {
return false
}
i++
continue
}
// unicode case
r, size := utf8.DecodeRune(b[i:])
if !unicode.IsSpace(r) {
return false
}
i += size
}
return true
}