summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2019-09-06 18:12:46 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2019-09-06 18:12:46 +0000
commitaa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch)
tree7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/regexp
parent920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff)
libgo: update to Go 1.13beta1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497 From-SVN: r275473
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go47
-rw-r--r--libgo/go/regexp/exec.go4
-rw-r--r--libgo/go/regexp/exec_test.go1
-rw-r--r--libgo/go/regexp/find_test.go20
-rw-r--r--libgo/go/regexp/onepass_test.go10
-rw-r--r--libgo/go/regexp/regexp.go53
-rw-r--r--libgo/go/regexp/syntax/parse_test.go1
-rw-r--r--libgo/go/regexp/syntax/regexp.go2
8 files changed, 116 insertions, 22 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 623f82df72d..626a69142f5 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -860,6 +860,25 @@ func BenchmarkQuoteMetaNone(b *testing.B) {
}
}
+var compileBenchData = []struct{ name, re string }{
+ {"Onepass", `^a.[l-nA-Cg-j]?e$`},
+ {"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`},
+ {"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)},
+}
+
+func BenchmarkCompile(b *testing.B) {
+ for _, data := range compileBenchData {
+ b.Run(data.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ if _, err := Compile(data.re); err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+ }
+}
+
func TestDeepEqual(t *testing.T) {
re1 := MustCompile("a.*b.*c.*d")
re2 := MustCompile("a.*b.*c.*d")
@@ -882,3 +901,31 @@ func TestDeepEqual(t *testing.T) {
t.Errorf("DeepEqual(re1, re2) = false, want true")
}
}
+
+var minInputLenTests = []struct {
+ Regexp string
+ min int
+}{
+ {``, 0},
+ {`a`, 1},
+ {`aa`, 2},
+ {`(aa)a`, 3},
+ {`(?:aa)a`, 3},
+ {`a?a`, 1},
+ {`(aaa)|(aa)`, 2},
+ {`(aa)+a`, 3},
+ {`(aa)*a`, 1},
+ {`(aa){3,5}`, 6},
+ {`[a-z]`, 1},
+ {`日`, 3},
+}
+
+func TestMinInputLen(t *testing.T) {
+ for _, tt := range minInputLenTests {
+ re, _ := syntax.Parse(tt.Regexp, syntax.Perl)
+ m := minInputLen(re)
+ if m != tt.min {
+ t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min)
+ }
+ }
+}
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go
index efe764e2dca..4411e4c3e61 100644
--- a/libgo/go/regexp/exec.go
+++ b/libgo/go/regexp/exec.go
@@ -524,6 +524,10 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
dstCap = arrayNoInts[:0:0]
}
+ if r == nil && len(b)+len(s) < re.minInputLen {
+ return nil
+ }
+
if re.onepass != nil {
return re.doOnePass(r, b, s, pos, ncap, dstCap)
}
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 14892193289..1e8795525d0 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -717,6 +717,7 @@ var benchSizes = []struct {
name string
n int
}{
+ {"16", 16},
{"32", 32},
{"1K", 1 << 10},
{"32K", 32 << 10},
diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go
index e07eb7d5c05..87c49b074fa 100644
--- a/libgo/go/regexp/find_test.go
+++ b/libgo/go/regexp/find_test.go
@@ -161,6 +161,9 @@ func TestFind(t *testing.T) {
t.Errorf("expected match; got none: %s", test)
case test.matches != nil && result != nil:
expect := test.text[test.matches[0][0]:test.matches[0][1]]
+ if len(result) != cap(result) {
+ t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
+ }
if expect != string(result) {
t.Errorf("expected %q got %q: %s", expect, result, test)
}
@@ -242,9 +245,13 @@ func TestFindAll(t *testing.T) {
continue
}
for k, e := range test.matches {
+ got := result[k]
+ if len(got) != cap(got) {
+ t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
+ }
expect := test.text[e[0]:e[1]]
- if expect != string(result[k]) {
- t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
+ if expect != string(got) {
+ t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
}
}
}
@@ -323,9 +330,14 @@ func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte,
}
continue
}
+ got := result[k/2]
+ if len(got) != cap(got) {
+ t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
+ return
+ }
expect := test.text[submatches[k]:submatches[k+1]]
- if expect != string(result[k/2]) {
- t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
+ if expect != string(got) {
+ t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
return
}
}
diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go
index a0f2e390489..32264d5f1ef 100644
--- a/libgo/go/regexp/onepass_test.go
+++ b/libgo/go/regexp/onepass_test.go
@@ -223,13 +223,3 @@ func TestRunOnePass(t *testing.T) {
}
}
}
-
-func BenchmarkCompileOnepass(b *testing.B) {
- b.ReportAllocs()
- const re = `^a.[l-nA-Cg-j]?e$`
- for i := 0; i < b.N; i++ {
- if _, err := Compile(re); err != nil {
- b.Fatal(err)
- }
- }
-}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 38b3c86d9f6..19ca6f2223f 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -46,9 +46,10 @@
// If 'Index' is present, matches and submatches are identified by byte index
// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
// the nth submatch. The pair for n==0 identifies the match of the entire
-// expression. If 'Index' is not present, the match is identified by the
-// text of the match/submatch. If an index is negative, it means that
-// subexpression did not match any string in the input.
+// expression. If 'Index' is not present, the match is identified by the text
+// of the match/submatch. If an index is negative or text is nil, it means that
+// subexpression did not match any string in the input. For 'String' versions
+// an empty string means either no match or an empty match.
//
// There is also a subset of the methods that can be applied to text read
// from a RuneReader:
@@ -93,6 +94,7 @@ type Regexp struct {
matchcap int // size of recorded match lengths
prefixComplete bool // prefix is the entire regexp
cond syntax.EmptyOp // empty-width conditions required at start of match
+ minInputLen int // minimum length of the input in bytes
// This field can be modified by the Longest method,
// but it is otherwise read-only.
@@ -190,6 +192,7 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
cond: prog.StartCond(),
longest: longest,
matchcap: matchcap,
+ minInputLen: minInputLen(re),
}
if regexp.onepass == nil {
regexp.prefix, regexp.prefixComplete = prog.Prefix()
@@ -263,6 +266,42 @@ func (re *Regexp) put(m *machine) {
matchPool[re.mpool].Put(m)
}
+// minInputLen walks the regexp to find the minimum length of any matchable input
+func minInputLen(re *syntax.Regexp) int {
+ switch re.Op {
+ default:
+ return 0
+ case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass:
+ return 1
+ case syntax.OpLiteral:
+ l := 0
+ for _, r := range re.Rune {
+ l += utf8.RuneLen(r)
+ }
+ return l
+ case syntax.OpCapture, syntax.OpPlus:
+ return minInputLen(re.Sub[0])
+ case syntax.OpRepeat:
+ return re.Min * minInputLen(re.Sub[0])
+ case syntax.OpConcat:
+ l := 0
+ for _, sub := range re.Sub {
+ l += minInputLen(sub)
+ }
+ return l
+ case syntax.OpAlternate:
+ l := minInputLen(re.Sub[0])
+ var lnext int
+ for _, sub := range re.Sub[1:] {
+ lnext = minInputLen(sub)
+ if lnext < l {
+ l = lnext
+ }
+ }
+ return l
+ }
+}
+
// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
@@ -761,7 +800,7 @@ func (re *Regexp) Find(b []byte) []byte {
if a == nil {
return nil
}
- return b[a[0]:a[1]]
+ return b[a[0]:a[1]:a[1]]
}
// FindIndex returns a two-element slice of integers defining the location of
@@ -829,7 +868,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
ret := make([][]byte, 1+re.numSubexp)
for i := range ret {
if 2*i < len(a) && a[2*i] >= 0 {
- ret[i] = b[a[2*i]:a[2*i+1]]
+ ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]]
}
}
return ret
@@ -1025,7 +1064,7 @@ func (re *Regexp) FindAll(b []byte, n int) [][]byte {
if result == nil {
result = make([][]byte, 0, startSize)
}
- result = append(result, b[match[0]:match[1]])
+ result = append(result, b[match[0]:match[1]:match[1]])
})
return result
}
@@ -1100,7 +1139,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
slice := make([][]byte, len(match)/2)
for j := range slice {
if match[2*j] >= 0 {
- slice[j] = b[match[2*j]:match[2*j+1]]
+ slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]]
}
}
result = append(result, slice)
diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go
index fe3d2517615..5581ba1ca5e 100644
--- a/libgo/go/regexp/syntax/parse_test.go
+++ b/libgo/go/regexp/syntax/parse_test.go
@@ -185,6 +185,7 @@ var parseTests = []parseTest{
{`(?-s).`, `dnl{}`},
{`(?:(?:^).)`, `cat{bol{}dot{}}`},
{`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
+ {`[\s\S]a`, `cat{cc{0x0-0x10ffff}lit{a}}`},
// RE2 prefix_tests
{`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
index ae5fa053f98..3a4d2d201cd 100644
--- a/libgo/go/regexp/syntax/regexp.go
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -139,7 +139,7 @@ func writeRegexp(b *strings.Builder, re *Regexp) {
b.WriteRune('[')
if len(re.Rune) == 0 {
b.WriteString(`^\x00-\x{10FFFF}`)
- } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
+ } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 {
// Contains 0 and MaxRune. Probably a negated class.
// Print the gaps.
b.WriteRune('^')