summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-09-14 17:11:35 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2017-09-14 17:11:35 +0000
commitbc998d034f45d1828a8663b2eed928faf22a7d01 (patch)
tree8d262a22ca7318f4bcd64269fe8fe9e45bcf8d0f /libgo/go/regexp
parenta41a6142df74219f596e612d3a7775f68ca6e96f (diff)
libgo: update to go1.9
Reviewed-on: https://go-review.googlesource.com/63753 From-SVN: r252767
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go10
-rw-r--r--libgo/go/regexp/exec.go6
-rw-r--r--libgo/go/regexp/exec_test.go29
-rw-r--r--libgo/go/regexp/onepass.go42
-rw-r--r--libgo/go/regexp/onepass_test.go22
-rw-r--r--libgo/go/regexp/regexp.go49
-rw-r--r--libgo/go/regexp/syntax/parse.go4
7 files changed, 125 insertions, 37 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index beb46e70995..28fe20c15d8 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -9,6 +9,7 @@ import (
"regexp/syntax"
"strings"
"testing"
+ "unicode/utf8"
)
var goodRe = []string{
@@ -354,6 +355,7 @@ type MetaTest struct {
var metaTests = []MetaTest{
{``, ``, ``, true},
{`foo`, `foo`, `foo`, true},
+ {`日本語+`, `日本語\+`, `日本語`, false},
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
@@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) {
var sink string
func BenchmarkQuoteMetaAll(b *testing.B) {
- s := string(specialBytes)
+ specials := make([]byte, 0)
+ for i := byte(0); i < utf8.RuneSelf; i++ {
+ if special(i) {
+ specials = append(specials, i)
+ }
+ }
+ s := string(specials)
b.SetBytes(int64(len(s)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go
index 977619cb28a..f8fe7b5deff 100644
--- a/libgo/go/regexp/exec.go
+++ b/libgo/go/regexp/exec.go
@@ -309,12 +309,14 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty
// onepass runs the machine over the input starting at pos.
// It reports whether a match was found.
// If so, m.matchcap holds the submatch information.
-func (m *machine) onepass(i input, pos int) bool {
+// ncap is the number of captures.
+func (m *machine) onepass(i input, pos, ncap int) bool {
startCond := m.re.cond
if startCond == ^syntax.EmptyOp(0) { // impossible
return false
}
m.matched = false
+ m.matchcap = m.matchcap[:ncap]
for i := range m.matchcap {
m.matchcap[i] = -1
}
@@ -428,7 +430,7 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
size = len(s)
}
if m.op != notOnePass {
- if !m.onepass(i, pos) {
+ if !m.onepass(i, pos, ncap) {
re.put(m)
return nil
}
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 766394de6ee..5f8e747b17b 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -681,6 +681,35 @@ func BenchmarkMatch(b *testing.B) {
}
}
+func BenchmarkMatch_onepass_regex(b *testing.B) {
+ isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
+ r := MustCompile(`(?s)\A.*\z`)
+ if r.get().op == notOnePass {
+ b.Fatalf("want onepass regex, but %q is not onepass", r)
+ }
+ for _, size := range benchSizes {
+ if isRaceBuilder && size.n > 1<<10 {
+ continue
+ }
+ t := makeText(size.n)
+ bs := make([][]byte, len(t))
+ for i, s := range t {
+ bs[i] = []byte{s}
+ }
+ b.Run(size.name, func(b *testing.B) {
+ b.SetBytes(int64(size.n))
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ for _, byts := range bs {
+ if !r.Match(byts) {
+ b.Fatal("not match!")
+ }
+ }
+ }
+ })
+ }
+}
+
var benchData = []struct{ name, re string }{
{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 1b0564c3fd0..3ceb4619058 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -222,9 +222,10 @@ func onePassCopy(prog *syntax.Prog) *onePassProg {
p := &onePassProg{
Start: prog.Start,
NumCap: prog.NumCap,
+ Inst: make([]onePassInst, len(prog.Inst)),
}
- for _, inst := range prog.Inst {
- p.Inst = append(p.Inst, onePassInst{Inst: inst})
+ for i, inst := range prog.Inst {
+ p.Inst[i] = onePassInst{Inst: inst}
}
// rewrites one or more common Prog constructs that enable some otherwise
@@ -304,13 +305,13 @@ func makeOnePass(p *onePassProg) *onePassProg {
var (
instQueue = newQueue(len(p.Inst))
visitQueue = newQueue(len(p.Inst))
- check func(uint32, map[uint32]bool) bool
+ check func(uint32, []bool) bool
onePassRunes = make([][]rune, len(p.Inst))
)
// check that paths from Alt instructions are unambiguous, and rebuild the new
// program as a onepass program
- check = func(pc uint32, m map[uint32]bool) (ok bool) {
+ check = func(pc uint32, m []bool) (ok bool) {
ok = true
inst := &p.Inst[pc]
if visitQueue.contains(pc) {
@@ -349,21 +350,20 @@ func makeOnePass(p *onePassProg) *onePassProg {
m[pc] = m[inst.Out]
// pass matching runes back through these no-ops.
onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
+ inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
+ for i := range inst.Next {
+ inst.Next[i] = inst.Out
}
case syntax.InstEmptyWidth:
ok = check(inst.Out, m)
m[pc] = m[inst.Out]
onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
+ inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
+ for i := range inst.Next {
+ inst.Next[i] = inst.Out
}
case syntax.InstMatch, syntax.InstFail:
m[pc] = inst.Op == syntax.InstMatch
- break
case syntax.InstRune:
m[pc] = false
if len(inst.Next) > 0 {
@@ -387,9 +387,9 @@ func makeOnePass(p *onePassProg) *onePassProg {
runes = append(runes, inst.Rune...)
}
onePassRunes[pc] = runes
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
+ inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
+ for i := range inst.Next {
+ inst.Next[i] = inst.Out
}
inst.Op = syntax.InstRune
case syntax.InstRune1:
@@ -411,9 +411,9 @@ func makeOnePass(p *onePassProg) *onePassProg {
runes = append(runes, inst.Rune[0], inst.Rune[0])
}
onePassRunes[pc] = runes
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
+ inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
+ for i := range inst.Next {
+ inst.Next[i] = inst.Out
}
inst.Op = syntax.InstRune
case syntax.InstRuneAny:
@@ -431,9 +431,9 @@ func makeOnePass(p *onePassProg) *onePassProg {
}
instQueue.insert(inst.Out)
onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
+ inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
+ for i := range inst.Next {
+ inst.Next[i] = inst.Out
}
}
return
@@ -441,7 +441,7 @@ func makeOnePass(p *onePassProg) *onePassProg {
instQueue.clear()
instQueue.insert(uint32(p.Start))
- m := make(map[uint32]bool, len(p.Inst))
+ m := make([]bool, len(p.Inst))
for !instQueue.empty() {
visitQueue.clear()
pc := instQueue.next()
diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go
index f4e336c43ba..b1caa445150 100644
--- a/libgo/go/regexp/onepass_test.go
+++ b/libgo/go/regexp/onepass_test.go
@@ -7,6 +7,7 @@ package regexp
import (
"reflect"
"regexp/syntax"
+ "strings"
"testing"
)
@@ -173,6 +174,7 @@ var onePassTests = []struct {
{`^.bc(d|e)*$`, onePass},
{`^(?:(?:aa)|.)$`, notOnePass},
{`^(?:(?:a{1,2}){1,2})$`, notOnePass},
+ {`^l` + strings.Repeat("o", 2<<8) + `ng$`, onePass},
}
func TestCompileOnePass(t *testing.T) {
@@ -223,3 +225,23 @@ func TestRunOnePass(t *testing.T) {
}
}
}
+
+func BenchmarkCompileOnepass(b *testing.B) {
+ for _, test := range onePassTests {
+ if test.onePass == notOnePass {
+ continue
+ }
+ name := test.re
+ if len(name) > 20 {
+ name = name[:20] + "..."
+ }
+ b.Run(name, func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ if _, err := Compile(test.re); err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+ }
+}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 01093d4bd0d..b1af23e8504 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -76,7 +76,8 @@ import (
)
// Regexp is the representation of a compiled regular expression.
-// A Regexp is safe for concurrent use by multiple goroutines.
+// A Regexp is safe for concurrent use by multiple goroutines,
+// except for configuration methods, such as Longest.
type Regexp struct {
// read-only after Compile
regexpRO
@@ -159,6 +160,8 @@ func CompilePOSIX(expr string) (*Regexp, error) {
// That is, when matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses a match that is as long as possible.
+// This method modifies the Regexp and may not be called concurrently
+// with any other methods.
func (re *Regexp) Longest() {
re.longest = true
}
@@ -313,11 +316,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
func (i *inputString) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+ // 0 < pos && pos <= len(i.str)
+ if uint(pos-1) < uint(len(i.str)) {
+ r1 = rune(i.str[pos-1])
+ if r1 >= utf8.RuneSelf {
+ r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+ }
}
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+ // 0 <= pos && pos < len(i.str)
+ if uint(pos) < uint(len(i.str)) {
+ r2 = rune(i.str[pos])
+ if r2 >= utf8.RuneSelf {
+ r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+ }
}
return syntax.EmptyOpContext(r1, r2)
}
@@ -352,11 +363,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
func (i *inputBytes) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRune(i.str[:pos])
+ // 0 < pos && pos <= len(i.str)
+ if uint(pos-1) < uint(len(i.str)) {
+ r1 = rune(i.str[pos-1])
+ if r1 >= utf8.RuneSelf {
+ r1, _ = utf8.DecodeLastRune(i.str[:pos])
+ }
}
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRune(i.str[pos:])
+ // 0 <= pos && pos < len(i.str)
+ if uint(pos) < uint(len(i.str)) {
+ r2 = rune(i.str[pos])
+ if r2 >= utf8.RuneSelf {
+ r2, _ = utf8.DecodeRune(i.str[pos:])
+ }
}
return syntax.EmptyOpContext(r1, r2)
}
@@ -590,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
})
}
-var specialBytes = []byte(`\.+*?()|[]{}^$`)
+// Bitmap used by func special to check whether a character needs to be escaped.
+var specialBytes [16]byte
+// special reports whether byte b needs to be escaped by QuoteMeta.
func special(b byte) bool {
- return bytes.IndexByte(specialBytes, b) >= 0
+ return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
+}
+
+func init() {
+ for _, b := range []byte(`\.+*?()|[]{}^$`) {
+ specialBytes[b%16] |= 1 << (b / 16)
+ }
}
// QuoteMeta returns a string that quotes all regular expression metacharacters
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index 7b8be55ddb1..8c6d43a7063 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -381,7 +381,7 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
}
}
if op == OpAlternate {
- re.Sub = p.factor(re.Sub, re.Flags)
+ re.Sub = p.factor(re.Sub)
if len(re.Sub) == 1 {
old := re
re = re.Sub[0]
@@ -402,7 +402,7 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
// which simplifies by character class introduction to
// A(B[CD]|EF)|BC[XY]
//
-func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
+func (p *parser) factor(sub []*Regexp) []*Regexp {
if len(sub) < 2 {
return sub
}