summaryrefslogtreecommitdiff
path: root/libgo/go/regexp/regexp.go
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-05-02 14:43:35 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-05-02 14:43:35 +0000
commit34efdaf078b01a7387007c4e6bde6db86384c4b7 (patch)
treed503eaf41d085669d1481bb46ec038bc866fece6 /libgo/go/regexp/regexp.go
parentf733cf303bcdc952c92b81dd62199a40a1f555ec (diff)
downloadgcc-tarball-master.tar.gz
gcc-7.1.0gcc-7.1.0
Diffstat (limited to 'libgo/go/regexp/regexp.go')
-rw-r--r--libgo/go/regexp/regexp.go159
1 files changed, 90 insertions, 69 deletions
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index d7d0edb993..01093d4bd0 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -22,14 +22,14 @@
// All characters are UTF-8-encoded code points.
//
// There are 16 methods of Regexp that match a regular expression and identify
-// the matched text. Their names are matched by this regular expression:
+// the matched text. Their names are matched by this regular expression:
//
// Find(All)?(String)?(Submatch)?(Index)?
//
// If 'All' is present, the routine matches successive non-overlapping
-// matches of the entire expression. Empty matches abutting a preceding
-// match are ignored. The return value is a slice containing the successive
-// return values of the corresponding non-'All' routine. These routines take
+// matches of the entire expression. Empty matches abutting a preceding
+// match are ignored. The return value is a slice containing the successive
+// return values of the corresponding non-'All' routine. These routines take
// an extra integer argument, n; if n >= 0, the function returns at most n
// matches/submatches.
//
@@ -45,9 +45,9 @@
//
// If 'Index' is present, matches and submatches are identified by byte index
// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
-// the nth submatch. The pair for n==0 identifies the match of the entire
-// expression. If 'Index' is not present, the match is identified by the
-// text of the match/submatch. If an index is negative, it means that
+// the nth submatch. The pair for n==0 identifies the match of the entire
+// expression. If 'Index' is not present, the match is identified by the
+// text of the match/submatch. If an index is negative, it means that
// subexpression did not match any string in the input.
//
// There is also a subset of the methods that can be applied to text read
@@ -55,7 +55,7 @@
//
// MatchReader, FindReaderIndex, FindReaderSubmatchIndex
//
-// This set may grow. Note that regular expression matches may need to
+// This set may grow. Note that regular expression matches may need to
// examine text beyond the text returned by a match, so the methods that
// match text from a RuneReader may read arbitrarily far into the input
// before returning.
@@ -75,12 +75,18 @@ import (
"unicode/utf8"
)
-var debug = false
-
// Regexp is the representation of a compiled regular expression.
// A Regexp is safe for concurrent use by multiple goroutines.
type Regexp struct {
// read-only after Compile
+ regexpRO
+
+ // cache of machines for running regexp
+ mu sync.Mutex
+ machine []*machine
+}
+
+type regexpRO struct {
expr string // as passed to Compile
prog *syntax.Prog // compiled program
onepass *onePassProg // onepass program or nil
@@ -93,10 +99,6 @@ type Regexp struct {
numSubexp int
subexpNames []string
longest bool
-
- // cache of machines for running regexp
- mu sync.Mutex
- machine []*machine
}
// String returns the source text used to compile the regular expression.
@@ -109,10 +111,11 @@ func (re *Regexp) String() string {
// When using a Regexp in multiple goroutines, giving each goroutine
// its own copy helps to avoid lock contention.
func (re *Regexp) Copy() *Regexp {
- r := *re
- r.mu = sync.Mutex{}
- r.machine = nil
- return &r
+ // It is not safe to copy Regexp by value
+ // since it contains a sync.Mutex.
+ return &Regexp{
+ regexpRO: re.regexpRO,
+ }
}
// Compile parses a regular expression and returns, if successful,
@@ -174,13 +177,15 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
return nil, err
}
regexp := &Regexp{
- expr: expr,
- prog: prog,
- onepass: compileOnePass(prog),
- numSubexp: maxCap,
- subexpNames: capNames,
- cond: prog.StartCond(),
- longest: longest,
+ regexpRO: regexpRO{
+ expr: expr,
+ prog: prog,
+ onepass: compileOnePass(prog),
+ numSubexp: maxCap,
+ subexpNames: capNames,
+ cond: prog.StartCond(),
+ longest: longest,
+ },
}
if regexp.onepass == notOnePass {
regexp.prefix, regexp.prefixComplete = prog.Prefix()
@@ -258,10 +263,10 @@ func (re *Regexp) NumSubexp() int {
}
// SubexpNames returns the names of the parenthesized subexpressions
-// in this Regexp. The name for the first sub-expression is names[1],
+// in this Regexp. The name for the first sub-expression is names[1],
// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
// Since the Regexp as a whole cannot be named, names[0] is always
-// the empty string. The slice should not be modified.
+// the empty string. The slice should not be modified.
func (re *Regexp) SubexpNames() []string {
return re.subexpNames
}
@@ -394,7 +399,7 @@ func (i *inputReader) context(pos int) syntax.EmptyOp {
}
// LiteralPrefix returns a literal string that must begin any match
-// of the regular expression re. It returns the boolean true if the
+// of the regular expression re. It returns the boolean true if the
// literal string comprises the entire regular expression.
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
return re.prefix, re.prefixComplete
@@ -403,21 +408,21 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
// MatchReader reports whether the Regexp matches the text read by the
// RuneReader.
func (re *Regexp) MatchReader(r io.RuneReader) bool {
- return re.doExecute(r, nil, "", 0, 0) != nil
+ return re.doMatch(r, nil, "")
}
// MatchString reports whether the Regexp matches the string s.
func (re *Regexp) MatchString(s string) bool {
- return re.doExecute(nil, nil, s, 0, 0) != nil
+ return re.doMatch(nil, nil, s)
}
// Match reports whether the Regexp matches the byte slice b.
func (re *Regexp) Match(b []byte) bool {
- return re.doExecute(nil, b, "", 0, 0) != nil
+ return re.doMatch(nil, b, "")
}
// MatchReader checks whether a textual regular expression matches the text
-// read by the RuneReader. More complicated queries need to use Compile and
+// read by the RuneReader. More complicated queries need to use Compile and
// the full Regexp interface.
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
re, err := Compile(pattern)
@@ -428,7 +433,7 @@ func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
}
// MatchString checks whether a textual regular expression
-// matches a string. More complicated queries need
+// matches a string. More complicated queries need
// to use Compile and the full Regexp interface.
func MatchString(pattern string, s string) (matched bool, err error) {
re, err := Compile(pattern)
@@ -439,7 +444,7 @@ func MatchString(pattern string, s string) (matched bool, err error) {
}
// Match checks whether a textual regular expression
-// matches a byte slice. More complicated queries need
+// matches a byte slice. More complicated queries need
// to use Compile and the full Regexp interface.
func Match(pattern string, b []byte) (matched bool, err error) {
re, err := Compile(pattern)
@@ -450,11 +455,11 @@ func Match(pattern string, b []byte) (matched bool, err error) {
}
// ReplaceAllString returns a copy of src, replacing matches of the Regexp
-// with the replacement string repl. Inside repl, $ signs are interpreted as
+// with the replacement string repl. Inside repl, $ signs are interpreted as
// in Expand, so for instance $1 represents the text of the first submatch.
func (re *Regexp) ReplaceAllString(src, repl string) string {
n := 2
- if strings.Index(repl, "$") >= 0 {
+ if strings.Contains(repl, "$") {
n = 2 * (re.numSubexp + 1)
}
b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
@@ -464,7 +469,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
}
// ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
-// with the replacement string repl. The replacement repl is substituted directly,
+// with the replacement string repl. The replacement repl is substituted directly,
// without using Expand.
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
@@ -474,7 +479,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
// ReplaceAllStringFunc returns a copy of src in which all matches of the
// Regexp have been replaced by the return value of function repl applied
-// to the matched substring. The replacement returned by repl is substituted
+// to the matched substring. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
@@ -497,8 +502,9 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
nmatch = re.prog.NumCap
}
+ var dstCap [2]int
for searchPos <= endPos {
- a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
+ a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
if len(a) == 0 {
break // no more matches
}
@@ -530,7 +536,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
searchPos += width
} else if searchPos+1 > a[1] {
// This clause is only needed at the end of the input
- // string. In that case, DecodeRuneInString returns width=0.
+ // string. In that case, DecodeRuneInString returns width=0.
searchPos++
} else {
searchPos = a[1]
@@ -548,7 +554,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
}
// ReplaceAll returns a copy of src, replacing matches of the Regexp
-// with the replacement text repl. Inside repl, $ signs are interpreted as
+// with the replacement text repl. Inside repl, $ signs are interpreted as
// in Expand, so for instance $1 represents the text of the first submatch.
func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
n := 2
@@ -566,7 +572,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
}
// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
-// with the replacement bytes repl. The replacement repl is substituted directly,
+// with the replacement bytes repl. The replacement repl is substituted directly,
// without using Expand.
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
@@ -576,7 +582,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
// ReplaceAllFunc returns a copy of src in which all matches of the
// Regexp have been replaced by the return value of function repl applied
-// to the matched byte slice. The replacement returned by repl is substituted
+// to the matched byte slice. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
@@ -592,13 +598,24 @@ func special(b byte) bool {
// QuoteMeta returns a string that quotes all regular expression metacharacters
// inside the argument text; the returned string is a regular expression matching
-// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
+// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
func QuoteMeta(s string) string {
- b := make([]byte, 2*len(s))
-
// A byte loop is correct because all metacharacters are ASCII.
- j := 0
- for i := 0; i < len(s); i++ {
+ var i int
+ for i = 0; i < len(s); i++ {
+ if special(s[i]) {
+ break
+ }
+ }
+ // No meta characters found, so return original string.
+ if i >= len(s) {
+ return s
+ }
+
+ b := make([]byte, 2*len(s)-i)
+ copy(b, s[:i])
+ j := i
+ for ; i < len(s); i++ {
if special(s[i]) {
b[j] = '\\'
j++
@@ -606,7 +623,7 @@ func QuoteMeta(s string) string {
b[j] = s[i]
j++
}
- return string(b[0:j])
+ return string(b[:j])
}
// The number of capture values in the program may correspond
@@ -636,7 +653,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
}
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
- matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
+ matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
if len(matches) == 0 {
break
}
@@ -676,7 +693,8 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
// A return value of nil indicates no match.
func (re *Regexp) Find(b []byte) []byte {
- a := re.doExecute(nil, b, "", 0, 2)
+ var dstCap [2]int
+ a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
if a == nil {
return nil
}
@@ -684,11 +702,11 @@ func (re *Regexp) Find(b []byte) []byte {
}
// FindIndex returns a two-element slice of integers defining the location of
-// the leftmost match in b of the regular expression. The match itself is at
+// the leftmost match in b of the regular expression. The match itself is at
// b[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindIndex(b []byte) (loc []int) {
- a := re.doExecute(nil, b, "", 0, 2)
+ a := re.doExecute(nil, b, "", 0, 2, nil)
if a == nil {
return nil
}
@@ -696,12 +714,13 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
}
// FindString returns a string holding the text of the leftmost match in s of the regular
-// expression. If there is no match, the return value is an empty string,
+// expression. If there is no match, the return value is an empty string,
// but it will also be empty if the regular expression successfully matches
-// an empty string. Use FindStringIndex or FindStringSubmatch if it is
+// an empty string. Use FindStringIndex or FindStringSubmatch if it is
// necessary to distinguish these cases.
func (re *Regexp) FindString(s string) string {
- a := re.doExecute(nil, nil, s, 0, 2)
+ var dstCap [2]int
+ a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
if a == nil {
return ""
}
@@ -709,11 +728,11 @@ func (re *Regexp) FindString(s string) string {
}
// FindStringIndex returns a two-element slice of integers defining the
-// location of the leftmost match in s of the regular expression. The match
+// location of the leftmost match in s of the regular expression. The match
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindStringIndex(s string) (loc []int) {
- a := re.doExecute(nil, nil, s, 0, 2)
+ a := re.doExecute(nil, nil, s, 0, 2, nil)
if a == nil {
return nil
}
@@ -722,11 +741,11 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
// FindReaderIndex returns a two-element slice of integers defining the
// location of the leftmost match of the regular expression in text read from
-// the RuneReader. The match text was found in the input stream at
+// the RuneReader. The match text was found in the input stream at
// byte offset loc[0] through loc[1]-1.
// A return value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
- a := re.doExecute(r, nil, "", 0, 2)
+ a := re.doExecute(r, nil, "", 0, 2, nil)
if a == nil {
return nil
}
@@ -739,7 +758,8 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
// comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
- a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
+ var dstCap [4]int
+ a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
if a == nil {
return nil
}
@@ -754,14 +774,14 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// Expand appends template to dst and returns the result; during the
// append, Expand replaces variables in the template with corresponding
-// matches drawn from src. The match slice should have been returned by
+// matches drawn from src. The match slice should have been returned by
// FindSubmatchIndex.
//
// In the template, a variable is denoted by a substring of the form
// $name or ${name}, where name is a non-empty sequence of letters,
-// digits, and underscores. A purely numeric name like $1 refers to
+// digits, and underscores. A purely numeric name like $1 refers to
// the submatch with the corresponding index; other names refer to
-// capturing parentheses named with the (?P<name>...) syntax. A
+// capturing parentheses named with the (?P<name>...) syntax. A
// reference to an out of range or unmatched index or a name that is not
// present in the regular expression is replaced with an empty slice.
//
@@ -886,7 +906,7 @@ func extract(str string) (name string, num int, rest string, ok bool) {
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
- return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
+ return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
}
// FindStringSubmatch returns a slice of strings holding the text of the
@@ -895,7 +915,8 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
// package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatch(s string) []string {
- a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
+ var dstCap [4]int
+ a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
if a == nil {
return nil
}
@@ -914,16 +935,16 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
// 'Index' descriptions in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
- return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
+ return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
}
// FindReaderSubmatchIndex returns a slice holding the index pairs
// identifying the leftmost match of the regular expression of text read by
// the RuneReader, and the matches, if any, of its subexpressions, as defined
-// by the 'Submatch' and 'Index' descriptions in the package comment. A
+// by the 'Submatch' and 'Index' descriptions in the package comment. A
// return value of nil indicates no match.
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
- return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
+ return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
}
const startSize = 10 // The size at which to start a slice in the 'All' routines.