diff options
Diffstat (limited to 'libgo/go/regexp/syntax/parse.go')
-rw-r--r-- | libgo/go/regexp/syntax/parse.go | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index d579a4069b1..f38bbf66e3c 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -470,9 +470,14 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { } sub = out - // Round 2: Factor out common complex prefixes, - // just the first piece of each concatenation, - // whatever it is. This is good enough a lot of the time. + // Round 2: Factor out common simple prefixes, + // just the first piece of each concatenation. + // This will be good enough a lot of the time. + // + // Complex subexpressions (e.g. involving quantifiers) + // are not safe to factor because that collapses their + // distinct paths through the automaton, which affects + // correctness in some cases. start = 0 out = sub[:0] var first *Regexp @@ -485,7 +490,9 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { var ifirst *Regexp if i < len(sub) { ifirst = p.leadingRegexp(sub[i]) - if first != nil && first.Equal(ifirst) { + if first != nil && first.Equal(ifirst) && + // first must be a character class OR a fixed repeat of a character class. + (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) { continue } } @@ -830,7 +837,14 @@ func Parse(s string, flags Flags) (*Regexp, error) { lit = t[2:i] t = t[i+2:] } - p.push(literalRegexp(lit, p.flags)) + for lit != "" { + c, rest, err := nextRune(lit) + if err != nil { + return nil, err + } + p.literal(c) + lit = rest + } break BigSwitch case 'z': p.op(OpEndText) |