summaryrefslogtreecommitdiff
path: root/libgo/go/regexp/syntax/parse.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp/syntax/parse.go')
-rw-r--r--libgo/go/regexp/syntax/parse.go24
1 files changed, 19 insertions, 5 deletions
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index d579a4069b1..f38bbf66e3c 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -470,9 +470,14 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
}
sub = out
- // Round 2: Factor out common complex prefixes,
- // just the first piece of each concatenation,
- // whatever it is. This is good enough a lot of the time.
+ // Round 2: Factor out common simple prefixes,
+ // just the first piece of each concatenation.
+ // This will be good enough a lot of the time.
+ //
+ // Complex subexpressions (e.g. involving quantifiers)
+ // are not safe to factor because that collapses their
+ // distinct paths through the automaton, which affects
+ // correctness in some cases.
start = 0
out = sub[:0]
var first *Regexp
@@ -485,7 +490,9 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
var ifirst *Regexp
if i < len(sub) {
ifirst = p.leadingRegexp(sub[i])
- if first != nil && first.Equal(ifirst) {
+ if first != nil && first.Equal(ifirst) &&
+ // first must be a character class OR a fixed repeat of a character class.
+ (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
continue
}
}
@@ -830,7 +837,14 @@ func Parse(s string, flags Flags) (*Regexp, error) {
lit = t[2:i]
t = t[i+2:]
}
- p.push(literalRegexp(lit, p.flags))
+ for lit != "" {
+ c, rest, err := nextRune(lit)
+ if err != nil {
+ return nil, err
+ }
+ p.literal(c)
+ lit = rest
+ }
break BigSwitch
case 'z':
p.op(OpEndText)