From bc998d034f45d1828a8663b2eed928faf22a7d01 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 14 Sep 2017 17:11:35 +0000 Subject: libgo: update to go1.9 Reviewed-on: https://go-review.googlesource.com/63753 From-SVN: r252767 --- libgo/go/regexp/all_test.go | 10 ++++++++- libgo/go/regexp/exec.go | 6 +++-- libgo/go/regexp/exec_test.go | 29 ++++++++++++++++++++++++ libgo/go/regexp/onepass.go | 42 +++++++++++++++++------------------ libgo/go/regexp/onepass_test.go | 22 ++++++++++++++++++ libgo/go/regexp/regexp.go | 49 ++++++++++++++++++++++++++++++++--------- libgo/go/regexp/syntax/parse.go | 4 ++-- 7 files changed, 125 insertions(+), 37 deletions(-) (limited to 'libgo/go/regexp') diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index beb46e70995..28fe20c15d8 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -9,6 +9,7 @@ import ( "regexp/syntax" "strings" "testing" + "unicode/utf8" ) var goodRe = []string{ @@ -354,6 +355,7 @@ type MetaTest struct { var metaTests = []MetaTest{ {``, ``, ``, true}, {`foo`, `foo`, `foo`, true}, + {`日本語+`, `日本語\+`, `日本語`, false}, {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, @@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) { var sink string func BenchmarkQuoteMetaAll(b *testing.B) { - s := string(specialBytes) + specials := make([]byte, 0) + for i := byte(0); i < utf8.RuneSelf; i++ { + if special(i) { + specials = append(specials, i) + } + } + s := string(specials) b.SetBytes(int64(len(s))) b.ResetTimer() for i := 0; i < b.N; i++ { diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index 977619cb28a..f8fe7b5deff 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -309,12 +309,14 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty // onepass runs the machine over the input starting at pos. // It reports whether a match was found. // If so, m.matchcap holds the submatch information. -func (m *machine) onepass(i input, pos int) bool { +// ncap is the number of captures. +func (m *machine) onepass(i input, pos, ncap int) bool { startCond := m.re.cond if startCond == ^syntax.EmptyOp(0) { // impossible return false } m.matched = false + m.matchcap = m.matchcap[:ncap] for i := range m.matchcap { m.matchcap[i] = -1 } @@ -428,7 +430,7 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i size = len(s) } if m.op != notOnePass { - if !m.onepass(i, pos) { + if !m.onepass(i, pos, ncap) { re.put(m) return nil } diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 766394de6ee..5f8e747b17b 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -681,6 +681,35 @@ func BenchmarkMatch(b *testing.B) { } } +func BenchmarkMatch_onepass_regex(b *testing.B) { + isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race") + r := MustCompile(`(?s)\A.*\z`) + if r.get().op == notOnePass { + b.Fatalf("want onepass regex, but %q is not onepass", r) + } + for _, size := range benchSizes { + if isRaceBuilder && size.n > 1<<10 { + continue + } + t := makeText(size.n) + bs := make([][]byte, len(t)) + for i, s := range t { + bs[i] = []byte{s} + } + b.Run(size.name, func(b *testing.B) { + b.SetBytes(int64(size.n)) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, byts := range bs { + if !r.Match(byts) { + b.Fatal("not match!") + } + } + } + }) + } +} + var benchData = []struct{ name, re string }{ {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"}, diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go index 1b0564c3fd0..3ceb4619058 100644 --- a/libgo/go/regexp/onepass.go +++ b/libgo/go/regexp/onepass.go @@ -222,9 +222,10 @@ func onePassCopy(prog *syntax.Prog) *onePassProg { p := &onePassProg{ Start: prog.Start, NumCap: prog.NumCap, + Inst: make([]onePassInst, len(prog.Inst)), } - for _, inst := range prog.Inst { - p.Inst = append(p.Inst, onePassInst{Inst: inst}) + for i, inst := range prog.Inst { + p.Inst[i] = onePassInst{Inst: inst} } // rewrites one or more common Prog constructs that enable some otherwise @@ -304,13 +305,13 @@ func makeOnePass(p *onePassProg) *onePassProg { var ( instQueue = newQueue(len(p.Inst)) visitQueue = newQueue(len(p.Inst)) - check func(uint32, map[uint32]bool) bool + check func(uint32, []bool) bool onePassRunes = make([][]rune, len(p.Inst)) ) // check that paths from Alt instructions are unambiguous, and rebuild the new // program as a onepass program - check = func(pc uint32, m map[uint32]bool) (ok bool) { + check = func(pc uint32, m []bool) (ok bool) { ok = true inst := &p.Inst[pc] if visitQueue.contains(pc) { @@ -349,21 +350,20 @@ func makeOnePass(p *onePassProg) *onePassProg { m[pc] = m[inst.Out] // pass matching runes back through these no-ops. onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out } case syntax.InstEmptyWidth: ok = check(inst.Out, m) m[pc] = m[inst.Out] onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out } case syntax.InstMatch, syntax.InstFail: m[pc] = inst.Op == syntax.InstMatch - break case syntax.InstRune: m[pc] = false if len(inst.Next) > 0 { @@ -387,9 +387,9 @@ func makeOnePass(p *onePassProg) *onePassProg { runes = append(runes, inst.Rune...) } onePassRunes[pc] = runes - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out } inst.Op = syntax.InstRune case syntax.InstRune1: @@ -411,9 +411,9 @@ func makeOnePass(p *onePassProg) *onePassProg { runes = append(runes, inst.Rune[0], inst.Rune[0]) } onePassRunes[pc] = runes - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out } inst.Op = syntax.InstRune case syntax.InstRuneAny: @@ -431,9 +431,9 @@ func makeOnePass(p *onePassProg) *onePassProg { } instQueue.insert(inst.Out) onePassRunes[pc] = append([]rune{}, anyRuneNotNL...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) + inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) + for i := range inst.Next { + inst.Next[i] = inst.Out } } return @@ -441,7 +441,7 @@ func makeOnePass(p *onePassProg) *onePassProg { instQueue.clear() instQueue.insert(uint32(p.Start)) - m := make(map[uint32]bool, len(p.Inst)) + m := make([]bool, len(p.Inst)) for !instQueue.empty() { visitQueue.clear() pc := instQueue.next() diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go index f4e336c43ba..b1caa445150 100644 --- a/libgo/go/regexp/onepass_test.go +++ b/libgo/go/regexp/onepass_test.go @@ -7,6 +7,7 @@ package regexp import ( "reflect" "regexp/syntax" + "strings" "testing" ) @@ -173,6 +174,7 @@ var onePassTests = []struct { {`^.bc(d|e)*$`, onePass}, {`^(?:(?:aa)|.)$`, notOnePass}, {`^(?:(?:a{1,2}){1,2})$`, notOnePass}, + {`^l` + strings.Repeat("o", 2<<8) + `ng$`, onePass}, } func TestCompileOnePass(t *testing.T) { @@ -223,3 +225,23 @@ func TestRunOnePass(t *testing.T) { } } } + +func BenchmarkCompileOnepass(b *testing.B) { + for _, test := range onePassTests { + if test.onePass == notOnePass { + continue + } + name := test.re + if len(name) > 20 { + name = name[:20] + "..." + } + b.Run(name, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if _, err := Compile(test.re); err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index 01093d4bd0d..b1af23e8504 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -76,7 +76,8 @@ import ( ) // Regexp is the representation of a compiled regular expression. -// A Regexp is safe for concurrent use by multiple goroutines. +// A Regexp is safe for concurrent use by multiple goroutines, +// except for configuration methods, such as Longest. type Regexp struct { // read-only after Compile regexpRO @@ -159,6 +160,8 @@ func CompilePOSIX(expr string) (*Regexp, error) { // That is, when matching against text, the regexp returns a match that // begins as early as possible in the input (leftmost), and among those // it chooses a match that is as long as possible. +// This method modifies the Regexp and may not be called concurrently +// with any other methods. func (re *Regexp) Longest() { re.longest = true } @@ -313,11 +316,19 @@ func (i *inputString) index(re *Regexp, pos int) int { func (i *inputString) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } @@ -352,11 +363,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { func (i *inputBytes) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRune(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRune(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRune(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } @@ -590,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { }) } -var specialBytes = []byte(`\.+*?()|[]{}^$`) +// Bitmap used by func special to check whether a character needs to be escaped. +var specialBytes [16]byte +// special reports whether byte b needs to be escaped by QuoteMeta. func special(b byte) bool { - return bytes.IndexByte(specialBytes, b) >= 0 + return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 +} + +func init() { + for _, b := range []byte(`\.+*?()|[]{}^$`) { + specialBytes[b%16] |= 1 << (b / 16) + } } // QuoteMeta returns a string that quotes all regular expression metacharacters diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index 7b8be55ddb1..8c6d43a7063 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -381,7 +381,7 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { } } if op == OpAlternate { - re.Sub = p.factor(re.Sub, re.Flags) + re.Sub = p.factor(re.Sub) if len(re.Sub) == 1 { old := re re = re.Sub[0] @@ -402,7 +402,7 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { // which simplifies by character class introduction to // A(B[CD]|EF)|BC[XY] // -func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { +func (p *parser) factor(sub []*Regexp) []*Regexp { if len(sub) < 2 { return sub } -- cgit v1.2.3