diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2012-12-22 14:27:13 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2012-12-22 14:27:13 +0000 |
commit | c949e6f8ed33f31bd46c1626dc6c5828a1750e56 (patch) | |
tree | e519be160770e6b20bfe88eb923ea6aa8edb3e58 /sed/regexp.c | |
download | sed-tarball-master.tar.gz |
Diffstat (limited to 'sed/regexp.c')
-rw-r--r-- | sed/regexp.c | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/sed/regexp.c b/sed/regexp.c new file mode 100644 index 0000000..bbeccb5 --- /dev/null +++ b/sed/regexp.c @@ -0,0 +1,267 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1999, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include "sed.h" + +#include <ctype.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef gettext_noop +# define N_(String) gettext_noop(String) +#else +# define N_(String) (String) +#endif + +extern bool use_extended_syntax_p; + +static const char errors[] = + "no previous regular expression\0" + "cannot specify modifiers on empty regexp"; + +#define NO_REGEX (errors) +#define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression"))) +#define END_ERRORS (BAD_MODIF + sizeof(N_("cannot specify modifiers on empty regexp"))) + + + +static void +compile_regex_1 (new_regex, needed_sub) + struct regex *new_regex; + int needed_sub; +{ +#ifdef REG_PERL + int errcode; + errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz, + (needed_sub ? 0 : REG_NOSUB) + | new_regex->flags + | extended_regexp_flags); + + if (errcode) + { + char errorbuf[200]; + regerror(errcode, NULL, errorbuf, 200); + bad_prog(gettext(errorbuf)); + } +#else + const char *error; + int syntax = ((extended_regexp_flags & REG_EXTENDED) + ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + syntax &= ~RE_DOT_NOT_NULL; + syntax |= RE_NO_POSIX_BACKTRACKING; + + switch (posixicity) + { + case POSIXLY_EXTENDED: + syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; + break; + case POSIXLY_CORRECT: + syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; + break; + case POSIXLY_BASIC: + syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS; + break; + } + +#ifdef RE_ICASE + syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0; +#endif +#ifdef RE_NO_SUB + syntax |= needed_sub ? 0 : RE_NO_SUB; +#endif + + new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (new_regex->flags & REG_NEWLINE) + { + /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + } + + re_set_syntax (syntax); + error = re_compile_pattern (new_regex->re, new_regex->sz, + &new_regex->pattern); + new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0; + + new_regex->pattern.translate = NULL; +#ifndef RE_ICASE + if (new_regex->flags & REG_ICASE) + { + static char translate[1 << (sizeof(char) * 8)]; + int i; + for (i = 0; i < sizeof(translate) / sizeof(char); i++) + translate[i] = tolower (i); + + new_regex->pattern.translate = translate; + } +#endif + + if (error) + bad_prog(error); +#endif + + /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ + if (needed_sub + && new_regex->pattern.re_nsub < needed_sub - 1 + && posixicity == POSIXLY_EXTENDED) + { + char buf[200]; + sprintf(buf, _("invalid reference \\%d on `s' command's RHS"), + needed_sub - 1); + bad_prog(buf); + } +} + +struct regex * +compile_regex(b, flags, needed_sub) + struct buffer *b; + int flags; + int needed_sub; +{ + struct regex *new_regex; + size_t re_len; + + /* // matches the last RE */ + if (size_buffer(b) == 0) + { + if (flags > 0) + bad_prog(_(BAD_MODIF)); + return NULL; + } + + re_len = size_buffer(b); + new_regex = ck_malloc(sizeof (struct regex) + re_len - 1); + new_regex->flags = flags; + memcpy (new_regex->re, get_buffer(b), re_len); + +#ifdef REG_PERL + new_regex->sz = re_len; +#else + /* GNU regex does not process \t & co. */ + new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX); +#endif + + compile_regex_1 (new_regex, needed_sub); + return new_regex; +} + +#ifdef REG_PERL +static void +copy_regs (regs, pmatch, nregs) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs; +{ + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (!regs->start) + { /* No. So allocate them with malloc. */ + regs->start = MALLOC (need_regs, regoff_t); + regs->end = MALLOC (need_regs, regoff_t); + regs->num_regs = need_regs; + } + else if (need_regs > regs->num_regs) + { /* Yes. We also need more elements than were already + allocated, so reallocate them. */ + regs->start = REALLOC (regs->start, need_regs, regoff_t); + regs->end = REALLOC (regs->end, need_regs, regoff_t); + regs->num_regs = need_regs; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; +} +#endif + +int +match_regex(regex, buf, buflen, buf_start_offset, regarray, regsize) + struct regex *regex; + char *buf; + size_t buflen; + size_t buf_start_offset; + struct re_registers *regarray; + int regsize; +{ + int ret; + static struct regex *regex_last; +#ifdef REG_PERL + regmatch_t rm[10], *regmatch = rm; + if (regsize > 10) + regmatch = (regmatch_t *) alloca (sizeof (regmatch_t) * regsize); +#endif + + /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */ + + /* Keep track of the last regexp matched. */ + if (!regex) + { + regex = regex_last; + if (!regex_last) + bad_prog(_(NO_REGEX)); + } + else + regex_last = regex; + +#ifdef REG_PERL + regmatch[0].rm_so = (int)buf_start_offset; + regmatch[0].rm_eo = (int)buflen; + ret = regexec (®ex->pattern, buf, regsize, regmatch, REG_STARTEND); + + if (regsize) + copy_regs (regarray, regmatch, regsize); + + return (ret == 0); +#else + if (regex->pattern.no_sub && regsize) + compile_regex_1 (regex, regsize); + + regex->pattern.regs_allocated = REGS_REALLOCATE; + + ret = re_search (®ex->pattern, buf, buflen, buf_start_offset, + buflen - buf_start_offset, + regsize ? regarray : NULL); + + return (ret > -1); +#endif +} + + +#ifdef DEBUG_LEAKS +void +release_regex(regex) + struct regex *regex; +{ + regfree(®ex->pattern); + free(regex); +} +#endif /*DEBUG_LEAKS*/ |