summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/Makefile.am19
-rw-r--r--src/Makefile.in340
-rw-r--r--src/buffer.c308
-rw-r--r--src/buffer.h62
-rw-r--r--src/config.h101
-rw-r--r--src/conns.c187
-rw-r--r--src/conns.h59
-rw-r--r--src/defines.h.in89
-rw-r--r--src/dnscache.c198
-rw-r--r--src/dnscache.h38
-rw-r--r--src/filter.c150
-rw-r--r--src/filter.h25
-rw-r--r--src/gnuregex.c5880
-rw-r--r--src/gnuregex.h542
-rw-r--r--src/log.c90
-rw-r--r--src/log.h24
-rw-r--r--src/regexp.h24
-rw-r--r--src/reqs.c832
-rw-r--r--src/reqs.h24
-rw-r--r--src/sock.c362
-rw-r--r--src/sock.h40
-rw-r--r--src/stamp-h.in1
-rw-r--r--src/tinyproxy.c478
-rw-r--r--src/tinyproxy.h87
-rw-r--r--src/uri.c124
-rw-r--r--src/uri.h32
-rw-r--r--src/utils.c264
-rw-r--r--src/utils.h38
28 files changed, 10418 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..63ffb6b
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,19 @@
+bin_PROGRAMS = tinyproxy
+tinyproxy_SOURCES = config.h \
+ buffer.c buffer.h \
+ conns.c conns.h \
+ dnscache.c dnscache.h \
+ log.c log.h \
+ reqs.c reqs.h \
+ sock.c sock.h \
+ tinyproxy.c tinyproxy.h \
+ uri.c uri.h \
+ utils.c utils.h \
+ regexp.h
+
+tinyproxy_LDADD = @LIBOBJS@
+
+EXTRA_DIST = gnuregex.c gnuregex.h \
+ filter.c filter.h
+
+LDADD = @LIBOBJS
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..0bc6f5c
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,340 @@
+# Makefile.in generated automatically by automake 1.4 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+CC = @CC@
+CFLAGS = @CFLAGS@ -I. -DHAVE_CONFIG_H
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+VERSION = @VERSION@
+
+bin_PROGRAMS = tinyproxy
+tinyproxy_SOURCES = config.h buffer.c buffer.h conns.c conns.h dnscache.c dnscache.h log.c log.h reqs.c reqs.h sock.c sock.h tinyproxy.c tinyproxy.h uri.c uri.h utils.c utils.h regexp.h
+
+
+tinyproxy_LDADD = @LIBOBJS@
+
+EXTRA_DIST = gnuregex.c gnuregex.h filter.c filter.h
+
+
+LDADD = @LIBOBJS
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = defines.h
+CONFIG_CLEAN_FILES =
+PROGRAMS = $(bin_PROGRAMS)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I.
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+tinyproxy_OBJECTS = buffer.o conns.o dnscache.o log.o reqs.o sock.o \
+tinyproxy.o uri.o utils.o
+tinyproxy_DEPENDENCIES = @LIBOBJS@
+tinyproxy_LDFLAGS =
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@
+DIST_COMMON = ./stamp-h.in Makefile.am Makefile.in defines.h.in \
+filter.c gnuregex.c
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+DEP_FILES = .deps/buffer.P .deps/conns.P .deps/dnscache.P \
+.deps/filter.P .deps/gnuregex.P .deps/log.P .deps/reqs.P .deps/sock.P \
+.deps/tinyproxy.P .deps/uri.P .deps/utils.P
+SOURCES = $(tinyproxy_SOURCES)
+OBJECTS = $(tinyproxy_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .c .o .s
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(BUILT_SOURCES)
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+defines.h: stamp-h
+ @if test ! -f $@; then \
+ rm -f stamp-h; \
+ $(MAKE) stamp-h; \
+ else :; fi
+stamp-h: $(srcdir)/defines.h.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES= CONFIG_HEADERS=src/defines.h \
+ $(SHELL) ./config.status
+ @echo timestamp > stamp-h 2> /dev/null
+$(srcdir)/defines.h.in: $(srcdir)/stamp-h.in
+ @if test ! -f $@; then \
+ rm -f $(srcdir)/stamp-h.in; \
+ $(MAKE) $(srcdir)/stamp-h.in; \
+ else :; fi
+$(srcdir)/stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOHEADER)
+ @echo timestamp > $(srcdir)/stamp-h.in 2> /dev/null
+
+mostlyclean-hdr:
+
+clean-hdr:
+
+distclean-hdr:
+ -rm -f defines.h
+
+maintainer-clean-hdr:
+
+mostlyclean-binPROGRAMS:
+
+clean-binPROGRAMS:
+ -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+distclean-binPROGRAMS:
+
+maintainer-clean-binPROGRAMS:
+
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ if test -f $$p; then \
+ echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \
+ $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+ else :; fi; \
+ done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ list='$(bin_PROGRAMS)'; for p in $$list; do \
+ rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+ done
+
+.s.o:
+ $(COMPILE) -c $<
+
+.S.o:
+ $(COMPILE) -c $<
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+tinyproxy: $(tinyproxy_OBJECTS) $(tinyproxy_DEPENDENCIES)
+ @rm -f tinyproxy
+ $(LINK) $(tinyproxy_LDFLAGS) $(tinyproxy_OBJECTS) $(tinyproxy_LDADD) $(LIBS)
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP)
+ list='$(SOURCES) $(HEADERS)'; \
+ unique=`for i in $$list; do echo $$i; done | \
+ awk ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ here=`pwd` && cd $(srcdir) \
+ && mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: $(HEADERS) $(SOURCES) defines.h.in $(TAGS_DEPENDENCIES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS)'; \
+ unique=`for i in $$list; do echo $$i; done | \
+ awk ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)defines.h.in$$unique$(LISP)$$tags" \
+ || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags defines.h.in $$unique $(LISP) -o $$here/TAGS)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+subdir = src
+
+distdir: $(DISTFILES)
+ here=`cd $(top_builddir) && pwd`; \
+ top_distdir=`cd $(top_distdir) && pwd`; \
+ distdir=`cd $(distdir) && pwd`; \
+ cd $(top_srcdir) \
+ && $(AUTOMAKE) --include-deps --build-dir=$$here --srcdir-name=$(top_srcdir) --output-dir=$$top_distdir --gnu doc/Makefile
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pr $$/$$file $(distdir)/$$file; \
+ else \
+ test -f $(distdir)/$$file \
+ || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+buffer.o: buffer.c defines.h utils.h conns.h tinyproxy.h config.h log.h \
+ buffer.h
+conns.o: conns.c defines.h config.h log.h utils.h conns.h tinyproxy.h \
+ buffer.h dnscache.h
+dnscache.o: dnscache.c defines.h utils.h conns.h tinyproxy.h config.h \
+ dnscache.h
+log.o: log.c defines.h tinyproxy.h config.h log.h
+reqs.o: reqs.c defines.h config.h tinyproxy.h sock.h buffer.h utils.h \
+ conns.h log.h reqs.h filter.h uri.h regexp.h
+sock.o: sock.c defines.h tinyproxy.h config.h sock.h buffer.h log.h \
+ utils.h conns.h dnscache.h
+tinyproxy.o: tinyproxy.c defines.h config.h tinyproxy.h utils.h conns.h \
+ log.h sock.h buffer.h reqs.h filter.h
+uri.o: uri.c defines.h uri.h utils.h conns.h tinyproxy.h config.h log.h \
+ regexp.h
+utils.o: utils.c defines.h config.h tinyproxy.h utils.h conns.h log.h \
+ buffer.h
+
+info-am:
+info: info-am
+dvi-am:
+dvi: dvi-am
+check-am: all-am
+check: check-am
+installcheck-am:
+installcheck: installcheck-am
+all-recursive-am: defines.h
+ $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+install-exec-am: install-binPROGRAMS
+install-exec: install-exec-am
+
+install-data-am:
+install-data: install-data-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-am
+uninstall-am: uninstall-binPROGRAMS
+uninstall: uninstall-am
+all-am: Makefile $(PROGRAMS) defines.h
+all-redirect: all-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am: mostlyclean-hdr mostlyclean-binPROGRAMS \
+ mostlyclean-compile mostlyclean-tags mostlyclean-depend \
+ mostlyclean-generic
+
+mostlyclean: mostlyclean-am
+
+clean-am: clean-hdr clean-binPROGRAMS clean-compile clean-tags \
+ clean-depend clean-generic mostlyclean-am
+
+clean: clean-am
+
+distclean-am: distclean-hdr distclean-binPROGRAMS distclean-compile \
+ distclean-tags distclean-depend distclean-generic \
+ clean-am
+
+distclean: distclean-am
+
+maintainer-clean-am: maintainer-clean-hdr maintainer-clean-binPROGRAMS \
+ maintainer-clean-compile maintainer-clean-tags \
+ maintainer-clean-depend maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-am
+
+.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \
+mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \
+maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile tags mostlyclean-tags distclean-tags \
+clean-tags maintainer-clean-tags distdir mostlyclean-depend \
+distclean-depend clean-depend maintainer-clean-depend info-am info \
+dvi-am dvi check check-am installcheck-am installcheck all-recursive-am \
+install-exec-am install-exec install-data-am install-data install-am \
+install uninstall-am uninstall all-redirect all-am all installdirs \
+mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-generic clean mostlyclean distclean maintainer-clean
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/buffer.c b/src/buffer.c
new file mode 100644
index 0000000..8e76e26
--- /dev/null
+++ b/src/buffer.c
@@ -0,0 +1,308 @@
+/* $Id: buffer.c,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * The buffer used in each connection is a linked list of lines. As the lines
+ * are read in and written out the buffer expands and contracts. Basically,
+ * by using this method we can increase the buffer size dynamicly. However,
+ * we have a hard limit of 64 KB for the size of the buffer. The buffer can be
+ * thought of as a queue were we act on both the head and tail. The various
+ * functions act on each end (the names are taken from what Perl uses to act on
+ * the ends of an array. :)
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <assert.h>
+
+#include "utils.h"
+#include "log.h"
+#include "tinyproxy.h"
+#include "buffer.h"
+
+/*
+ * Take a string of data and a length and make a new line which can be added
+ * to the buffer
+ */
+static struct bufline_s *makenewline(unsigned char *data, unsigned int length)
+{
+ struct bufline_s *newline;
+
+ assert(data);
+ assert(length > 0);
+
+ if (!(newline = xmalloc(sizeof(struct bufline_s))))
+ return NULL;
+
+ newline->string = data;
+ newline->next = NULL;
+ newline->length = length;
+ newline->pos = 0;
+
+ return newline;
+}
+
+/*
+ * Create a new buffer
+ */
+struct buffer_s *new_buffer(void)
+{
+ struct buffer_s *buffptr;
+
+ if (!(buffptr = xmalloc(sizeof(struct buffer_s))))
+ return NULL;
+
+ buffptr->head = buffptr->tail = NULL;
+ buffptr->size = 0;
+
+ return buffptr;
+}
+
+/*
+ * Delete all the lines in the buffer and the buffer itself
+ */
+void delete_buffer(struct buffer_s *buffptr)
+{
+ struct bufline_s *next;
+
+ assert(buffptr);
+
+ while (buffer_head(buffptr)) {
+ next = buffer_head(buffptr)->next;
+ free_line(buffer_head(buffptr));
+ buffer_head(buffptr) = next;
+ }
+ buffer_head(buffptr) = NULL;
+ buffer_tail(buffptr) = NULL;
+
+ safefree(buffptr);
+}
+
+/*
+ * Free the allocated buffer line
+ */
+void free_line(struct bufline_s *line)
+{
+ if (!line)
+ return;
+
+ if (line->string) {
+ safefree(line->string);
+ }
+
+ safefree(line);
+}
+
+/*
+ * Push a new line on to the end of the buffer
+ */
+struct bufline_s *push_buffer(struct buffer_s *buffptr, unsigned char *data,
+ unsigned int length)
+{
+ struct bufline_s *newline;
+
+ assert(buffptr);
+ assert(data);
+ assert(length > 0);
+
+ if (!(newline = makenewline(data, length)))
+ return NULL;
+
+ if (!buffer_head(buffptr) && !buffer_tail(buffptr))
+ buffer_head(buffptr) = buffer_tail(buffptr) = newline;
+ else
+ buffer_tail(buffptr) = (buffer_tail(buffptr)->next = newline);
+
+ buffptr->size += length;
+
+ return newline;
+}
+
+/*
+ * Pop a buffer line off the end of the buffer
+ */
+struct bufline_s *pop_buffer(struct buffer_s *buffptr)
+{
+ struct bufline_s *line, *newend;
+
+ assert(buffptr);
+
+ if (buffer_head(buffptr) == buffer_tail(buffptr)) {
+ line = buffer_head(buffptr);
+ buffer_head(buffptr) = buffer_tail(buffptr) = NULL;
+ buffptr->size = 0;
+ return line;
+ }
+
+ line = buffer_tail(buffptr);
+ newend = buffer_head(buffptr);
+
+ while (newend->next != line && newend->next)
+ newend = newend->next;
+
+ buffer_tail(buffptr) = newend;
+ buffptr->size -= line->length;
+
+ return line;
+}
+
+/*
+ * Unshift a buffer line from the top of the buffer (meaning add a new line
+ * to the top of the buffer)
+ */
+struct bufline_s *unshift_buffer(struct buffer_s *buffptr, unsigned char *data,
+ unsigned int length)
+{
+ struct bufline_s *newline;
+
+ assert(buffptr);
+ assert(data);
+ assert(length > 0);
+
+ if (!(newline = makenewline(data, length)))
+ return NULL;
+
+ if (!buffer_head(buffptr) && buffer_tail(buffptr)) {
+ buffer_head(buffptr) = buffer_tail(buffptr) = newline;
+ } else {
+ newline->next = buffer_head(buffptr);
+ buffer_head(buffptr) = newline;
+ if (!buffer_tail(buffptr))
+ buffer_tail(buffptr) = newline;
+ }
+
+ buffptr->size += length;
+
+ return newline;
+}
+
+/*
+ * Shift a line off the top of the buffer (remove the line from the top of
+ * the buffer)
+ */
+struct bufline_s *shift_buffer(struct buffer_s *buffptr)
+{
+ struct bufline_s *line;
+
+ assert(buffptr);
+
+ if (!buffer_head(buffptr) && !buffer_tail(buffptr)) {
+ line = buffer_head(buffptr);
+ buffer_head(buffptr) = buffer_tail(buffptr) = NULL;
+ buffptr->size = 0;
+ return line;
+ }
+
+ line = buffer_head(buffptr);
+ buffer_head(buffptr) = line->next;
+
+ if (!buffer_head(buffptr))
+ buffer_tail(buffptr) = NULL;
+
+ buffptr->size -= line->length;
+
+ return line;
+}
+
+/*
+ * Reads the bytes from the socket, and adds them to the buffer.
+ * Takes a connection and returns the number of bytes read.
+ */
+int readbuff(int fd, struct buffer_s *buffptr)
+{
+ int bytesin;
+ unsigned char inbuf[BUFFER];
+ unsigned char *buffer;
+
+ assert(fd >= 0);
+ assert(buffptr);
+
+ bytesin = recv(fd, inbuf, BUFFER, 0);
+
+ if (bytesin > 0) {
+ if (!(buffer = xmalloc(bytesin)))
+ return 0;
+
+ memcpy(buffer, inbuf, bytesin);
+ push_buffer(buffptr, buffer, bytesin);
+ return bytesin;
+ } else if (bytesin == 0) {
+ /* connection was closed by client */
+ return -1;
+ } else {
+ switch (errno) {
+#ifdef EWOULDBLOCK
+ case EWOULDBLOCK:
+#else
+# ifdef EAGAIN
+ case EAGAIN:
+# endif
+#endif
+ case EINTR:
+ return 0;
+ case ECONNRESET:
+ return -1;
+ default:
+ log("ERROR readbuff: recv (%s)", strerror(errno));
+ return -1;
+ }
+ }
+}
+
+/*
+ * Write the bytes in the buffer to the socket.
+ * Takes a connection and returns the number of bytes written.
+ */
+int writebuff(int fd, struct buffer_s *buffptr)
+{
+ int bytessent;
+ struct bufline_s *line = buffer_head(buffptr);
+
+ assert(fd >= 0);
+ assert(buffptr);
+
+ bytessent = send(fd, line->string + line->pos,
+ (size_t) (line->length - line->pos), 0);
+
+ if (bytessent >= 0) {
+ /* bytes sent, adjust buffer */
+ line->pos += bytessent;
+ if (line->pos == line->length)
+ free_line(shift_buffer(buffptr));
+ return bytessent;
+ } else {
+ switch (errno) {
+#ifdef EWOULDBLOCK
+ case EWOULDBLOCK:
+#else
+# ifdef EAGAIN
+ case EAGAIN:
+# endif
+#endif
+ case ENOBUFS:
+ case EINTR:
+ case ENOMEM:
+ return 0;
+ default:
+ log("ERROR writebuff: send (%s)", strerror(errno));
+ return -1;
+ }
+ }
+}
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 0000000..a1ee86c
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,62 @@
+/* $Id: buffer.h,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * See 'buffer.c' for a detailed description.
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __BUFFER_H_
+#define __BUFFER_H_ 1
+
+#define MAXBUFFSIZE 20480
+
+struct bufline_s {
+ unsigned char *string; /* the actual string of data */
+ struct bufline_s *next; /* pointer to next in linked list */
+ unsigned int length; /* length of the string of data */
+ unsigned int pos; /* start sending from this offset */
+};
+
+struct buffer_s {
+ struct bufline_s *head; /* top of the buffer */
+ struct bufline_s *tail; /* bottom of the buffer */
+ unsigned long int size; /* total size of the buffer */
+};
+
+#define buffer_head(x) (((struct buffer_s *)(x))->head)
+#define buffer_tail(x) (((struct buffer_s *)(x))->tail)
+#define buffer_size(x) (((struct buffer_s *)(x))->size)
+
+/* Create and delete buffers */
+extern struct buffer_s *new_buffer(void);
+extern void delete_buffer(struct buffer_s *buffptr);
+
+/* Operate on the end of the list */
+extern struct bufline_s *push_buffer(struct buffer_s *buffptr,
+ unsigned char *data, unsigned int length);
+extern struct bufline_s *pop_buffer(struct buffer_s *buffptr);
+
+/* Operate on the head of the list */
+extern struct bufline_s *unshift_buffer(struct buffer_s *buffptr,
+ unsigned char *data,
+ unsigned int length);
+extern struct bufline_s *shift_buffer(struct buffer_s *buffptr);
+
+/* Free a buffer line */
+extern void free_line(struct bufline_s *line);
+
+/* Read/Write the buffer to a socket */
+extern int readbuff(int fd, struct buffer_s *buffptr);
+extern int writebuff(int fd, struct buffer_s *buffptr);
+
+#endif /* __BUFFER_H_ */
diff --git a/src/config.h b/src/config.h
new file mode 100644
index 0000000..f5f7dda
--- /dev/null
+++ b/src/config.h
@@ -0,0 +1,101 @@
+/* $Id: config.h,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * Contains all the tune-able variables which are used by tinyproxy.
+ * Modifications made to these variables WILL change the default behaviour
+ * of tinyproxy. Please read the comments to better understand what each
+ * variable does.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _CONFIG_H_
+#define _CONFIG_H_ 1
+
+/* Change these if you want */
+
+/* Default log file */
+#ifndef DEFAULT_LOG
+#define DEFAULT_LOG "/usr/local/var/log/tinyproxy.log"
+#endif
+
+/* Default port tinyproxy listens on */
+#ifndef DEFAULT_PORT
+#define DEFAULT_PORT 8080
+#endif
+
+/* Default user to change to after */
+#ifndef DEFAULT_USER
+#define DEFAULT_USER "nobody"
+#endif
+
+/*
+ * Define this if you want to have all requests sent to a particular machine.
+ * This is useful if you have another proxy further upstream which you must
+ * send your HTTP requests through.
+ */
+#undef UPSTREAM
+
+#ifdef UPSTREAM
+#define UPSTREAM_PROXY_NAME "name.of.upstream.machine"
+#define UPSTREAM_PROXY_PORT 80
+#endif
+
+/*
+ * Define this if you want tinyproxy to use /proc/loadavg to determine
+ * system load (Linux only, I think)
+ */
+#define USE_PROC
+
+#ifndef USE_PROC
+/*
+ * Path to uptime to determin system load. This path doesn't have to be
+ * valid if DEFAULT_CUTOFFLOAD is 0
+ */
+#define UPTIME_PATH "/usr/bin/uptime"
+#endif /* !USE_PROC */
+
+/*
+ * The default load at which tinyproxy will start refusing connections.
+ * 0 == disabled by default
+ */
+#define DEFAULT_CUTOFFLOAD 0
+
+/*
+ * NOTE: for DEFAULT_STATHOST: this controls remote proxy stats display.
+ * for example, the default DEFAULT_STATHOST of "tinyproxy.stats" will
+ * mean that when you use the proxy to access http://tinyproxy.stats/",
+ * you will be shown the proxy stats. Set this to something obscure
+ * if you don't want random people to be able to see them, or set it to
+ * "" to disable. In the future, I figure maybe some sort of auth
+ * might be desirable, but that would involve a major simplicity
+ * sacrifice.
+ */
+
+/* The "hostname" for getting tinyproxy stats. "" = disabled by default */
+#define DEFAULT_STATHOST "tinyproxy.stats"
+
+/*
+ * NOTE: change these if you know what you're doing
+ */
+#define SOCK_TIMEOUT 5
+/* Recalculate load avery 30 seconds */
+#define LOAD_RECALCTIMER 30
+/* Default HTTP port */
+#define HTTP_PORT 80
+/* Every time conncoll is called, nuke connections idle for > 60 seconds */
+#define STALECONN_TIME (60 * 15)
+/* Every 100 times through run the garbage collection */
+#define GARBCOLL_INTERVAL 10
+
+#endif
diff --git a/src/conns.c b/src/conns.c
new file mode 100644
index 0000000..a96067f
--- /dev/null
+++ b/src/conns.c
@@ -0,0 +1,187 @@
+/* $Id: conns.c,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * These functions handle the various stages a connection will go through in
+ * the course of its life in tinyproxy. New connections are initialized and
+ * added to the linked list of active connections. As these connections are
+ * completed, they are closed, and the a garbage collection process removes
+ * them from the list.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sysexits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <adns.h>
+
+extern adns_state adns;
+
+#include "config.h"
+#include "log.h"
+#include "utils.h"
+#include "conns.h"
+#include "buffer.h"
+#include "dnscache.h"
+
+struct conn_s *connections = NULL;
+
+/*
+ * Add a new connection to the linked list
+ */
+struct conn_s *new_conn(int fd)
+{
+ struct conn_s **rpConnptr = &connections;
+ struct conn_s *connptr = connections;
+ struct conn_s *newconn;
+
+ assert(fd >= 0);
+
+ while (connptr) {
+ rpConnptr = &connptr->next;
+ connptr = connptr->next;
+ }
+
+ if (!(newconn = xmalloc(sizeof(struct conn_s)))) {
+ log("ERROR new_conn: could not allocate memory for conn");
+ return NULL;
+ }
+
+ /* Allocate the new buffer */
+ newconn->cbuffer = NULL;
+ newconn->sbuffer = NULL;
+ if (!(newconn->cbuffer = new_buffer())
+ || !(newconn->sbuffer = new_buffer())) {
+ log("ERROR new_conn: could not allocate memory for buffer");
+ safefree(newconn->cbuffer);
+ safefree(newconn->sbuffer);
+
+ newconn->next = NULL;
+ safefree(newconn);
+ return NULL;
+ }
+
+ newconn->client_fd = fd;
+ newconn->server_fd = -1;
+ newconn->type = NEWCONN;
+ newconn->inittime = newconn->actiontime = time(NULL);
+
+ newconn->clientheader = newconn->serverheader = FALSE;
+ newconn->simple_req = FALSE;
+
+ *rpConnptr = newconn;
+ newconn->next = connptr;
+
+ stats.num_cons++;
+
+ return newconn;
+}
+
+/*
+ * Delete a connection from the linked list
+ */
+int del_conn(struct conn_s *delconn)
+{
+ struct conn_s **rpConnptr = &connections;
+ struct conn_s *connptr = connections;
+
+ assert(delconn);
+
+ if (delconn->cbuffer) {
+ delete_buffer(delconn->cbuffer);
+ delconn->cbuffer = NULL;
+ }
+ if (delconn->sbuffer) {
+ delete_buffer(delconn->sbuffer);
+ delconn->sbuffer = NULL;
+ }
+
+ close(delconn->client_fd);
+ close(delconn->server_fd);
+
+ while (connptr && (connptr != delconn)) {
+ rpConnptr = &connptr->next;
+ connptr = connptr->next;
+ }
+
+ if (connptr == delconn) {
+ *rpConnptr = delconn->next;
+ safefree(delconn);
+ }
+
+ return 0;
+}
+
+/*
+ * Check for connections that have been idle too long
+ */
+void conncoll(void)
+{
+ struct conn_s *connptr = connections;
+
+ while (connptr) {
+ if (
+ (difftime(time(NULL), connptr->actiontime) >
+ STALECONN_TIME) && connptr->type != FINISHCONN) {
+ connptr->type = FINISHCONN;
+ stats.num_idles++;
+ }
+ connptr = connptr->next;
+ }
+}
+
+/*
+ * Actually remove all entries in the linked list that have been marked for
+ * deletion.
+ */
+void garbcoll(void)
+{
+ struct conn_s *connptr = connections;
+ struct conn_s *tmp;
+
+ static unsigned int dnscount = 0;
+
+#ifdef __DEBUG__
+ log("Garbage collecting (%lu)", stats.num_garbage);
+#endif
+
+ stats.num_garbage++;
+
+ while (connptr) {
+ tmp = connptr->next;
+ if (connptr->type == FINISHCONN) {
+#ifdef __DEBUG__
+ log("Deleting connection: %p", connptr);
+#endif
+ del_conn(connptr);
+ }
+ connptr = tmp;
+ }
+
+ if (dnscount++ > DNS_GARBAGE_COL) {
+ dnscount = 0;
+ dnsclean();
+ }
+}
diff --git a/src/conns.h b/src/conns.h
new file mode 100644
index 0000000..05cfdb5
--- /dev/null
+++ b/src/conns.h
@@ -0,0 +1,59 @@
+/* $Id: conns.h,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * See 'conns.c' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _CONN_H_
+#define _CONN_H_ 1
+
+#include "tinyproxy.h"
+
+#include <adns.h>
+
+/* Different connection types */
+enum conn_type {
+ NEWCONN,
+ WAITCONN,
+ DNS_WAITCONN,
+ RELAYCONN,
+ CLOSINGCONN,
+ FINISHCONN
+};
+
+struct conn_s {
+ struct conn_s *next;
+ int client_fd, server_fd;
+ enum conn_type type;
+ struct buffer_s *cbuffer;
+ struct buffer_s *sbuffer;
+ time_t inittime, actiontime;
+ flag clientheader, serverheader;
+ flag simple_req;
+
+ adns_query adns_qu;
+ char *domain;
+ int port_no;
+};
+
+extern struct conn_s *connections;
+
+/* Handle the creation and deletion of connections */
+extern struct conn_s *new_conn(int fd);
+extern int del_conn(struct conn_s *connptr);
+extern void conncoll(void);
+extern void garbcoll(void);
+
+#endif
diff --git a/src/defines.h.in b/src/defines.h.in
new file mode 100644
index 0000000..b7202fe
--- /dev/null
+++ b/src/defines.h.in
@@ -0,0 +1,89 @@
+/* src/defines.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define to empty if the keyword does not work. */
+#undef const
+
+/* Define if you don't have vprintf but do have _doprnt. */
+#undef HAVE_DOPRNT
+
+/* Define if you have the strftime function. */
+#undef HAVE_STRFTIME
+
+/* Define if you have the vprintf function. */
+#undef HAVE_VPRINTF
+
+/* Define as __inline if that's what the C compiler calls it. */
+#undef inline
+
+/* Define as the return type of signal handlers (int or void). */
+#undef RETSIGTYPE
+
+/* Define to `unsigned' if <sys/types.h> doesn't define. */
+#undef size_t
+
+/* Define if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/*
+ * Define if you want to have the peer's IP address to be included in a
+ * XTinyproxy header sent to the server.
+ */
+#undef XTINYPROXY
+
+/* chris - undef'd here so that they can be set up in the configure script */
+#undef DEFAULT_LOG
+#undef DEFAULT_PORT
+#undef DEFAULT_USER
+
+/*
+ * Define if you would like to include filtering code.
+ */
+#undef FILTER_ENABLE
+
+/*
+ * Define if you want to use the included GNU regex routine
+ */
+#undef USE_GNU_REGEX
+
+/* Define if you have the regcomp function. */
+#undef HAVE_REGCOMP
+
+/* Define if you have the select function. */
+#undef HAVE_SELECT
+
+/* Define if you have the socket function. */
+#undef HAVE_SOCKET
+
+/* Define if you have the strdup function. */
+#undef HAVE_STRDUP
+
+/* Define if you have the strerror function. */
+#undef HAVE_STRERROR
+
+/* Define if you have the vsnprintf function. */
+#undef HAVE_VSNPRINTF
+
+/* Define if you have the vsyslog function. */
+#undef HAVE_VSYSLOG
+
+/* Define if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define if you have the <syslog.h> header file. */
+#undef HAVE_SYSLOG_H
+
+/* Define if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+
diff --git a/src/dnscache.c b/src/dnscache.c
new file mode 100644
index 0000000..ecd7cee
--- /dev/null
+++ b/src/dnscache.c
@@ -0,0 +1,198 @@
+/* $Id: dnscache.c,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * This is a caching DNS system. When a host name is needed we look it up here
+ * and see if there is already an answer for it. The domains are placed in a
+ * hashed linked list. If the name is not here, then we need to look it up and
+ * add it to the system. This really speeds up the connection to servers since
+ * the DNS name does not need to be looked up each time. It's kind of cool. :)
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ * Copyright (C) 2000 Chris Lightfoot (chris@ex-parrot.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "utils.h"
+#include "dnscache.h"
+
+#define HASH_BOX 25
+#define DNSEXPIRE (5 * 60)
+
+struct dnscache_s {
+ struct in_addr ipaddr;
+ char *domain;
+ time_t expire;
+ struct dnscache_s *next;
+};
+
+struct dnscache_s *cache[HASH_BOX];
+
+static unsigned int hash(unsigned char *keystr, unsigned int box)
+{
+ unsigned long hashc = 0;
+ unsigned char ch;
+
+ assert(keystr);
+ assert(box > 0);
+
+ while ((ch = *keystr++))
+ hashc += tolower(ch);
+
+ return hashc % box;
+}
+
+int lookup(struct in_addr *addr, char *domain)
+{
+ unsigned int box = hash(domain, HASH_BOX);
+ struct dnscache_s **rptr = &cache[box];
+ struct dnscache_s *ptr = cache[box];
+
+ assert(addr);
+ assert(domain);
+
+ while (ptr && strcasecmp(ptr->domain, domain)) {
+ rptr = &ptr->next;
+ ptr = ptr->next;
+ }
+
+ if (ptr && !strcasecmp(ptr->domain, domain)) {
+ /* Woohoo... found it. Make sure it hasn't expired */
+ if (difftime(time(NULL), ptr->expire) > DNSEXPIRE) {
+ /* Oops... expired */
+ *rptr = ptr->next;
+ safefree(ptr->domain);
+ safefree(ptr);
+ return -1;
+ }
+
+ /* chris - added this so that the routine can be used to just
+ * look stuff up.
+ */
+ if (addr) *addr = ptr->ipaddr;
+ return 0;
+ }
+
+ return -1;
+}
+
+int insert(struct in_addr *addr, char *domain)
+{
+ unsigned int box = hash(domain, HASH_BOX);
+ struct dnscache_s **rptr = &cache[box];
+ struct dnscache_s *ptr = cache[box];
+ struct dnscache_s *newptr;
+
+ assert(addr);
+ assert(domain);
+
+ while (ptr) {
+ rptr = &ptr->next;
+ ptr = ptr->next;
+ }
+
+ if (!(newptr = xmalloc(sizeof(struct dnscache_s)))) {
+ return -1;
+ }
+
+ if (!(newptr->domain = xstrdup(domain))) {
+ safefree(newptr);
+ return -1;
+ }
+
+ newptr->ipaddr = *addr;
+
+ newptr->expire = time(NULL);
+
+ *rptr = newptr;
+ newptr->next = ptr;
+
+ return 0;
+}
+
+int dnscache(struct in_addr *addr, char *domain)
+{
+ struct hostent *resolv;
+
+ assert(addr);
+ assert(domain);
+
+ if (inet_aton(domain, (struct in_addr *) addr) != 0)
+ return 0;
+
+ /* Well, we're not dotted-decimal so we need to look it up */
+ if (lookup(addr, domain) == 0)
+ return 0;
+
+ /* Okay, so not in the list... need to actually look it up. */
+ if (!(resolv = gethostbyname(domain)))
+ return -1;
+
+ memcpy(addr, resolv->h_addr_list[0], resolv->h_length);
+ insert(addr, domain);
+
+ return 0;
+}
+
+static void dnsdelete(unsigned int c, struct dnscache_s *del)
+{
+ struct dnscache_s **rptr;
+ struct dnscache_s *ptr;
+
+ assert(c > 0);
+ assert(del);
+
+ rptr = &cache[c];
+ ptr = cache[c];
+
+ while (ptr && (ptr != del)) {
+ rptr = &ptr->next;
+ ptr = ptr->next;
+ }
+
+ if (ptr == del) {
+ *rptr = ptr->next;
+ safefree(ptr->domain);
+ safefree(ptr);
+ }
+}
+
+void dnsclean(void)
+{
+ unsigned int c;
+ struct dnscache_s *ptr, *tmp;
+
+ for (c = 0; c < HASH_BOX; c++) {
+ ptr = cache[c];
+
+ while (ptr) {
+ tmp = ptr->next;
+
+ if (difftime(time(NULL), ptr->expire) > DNSEXPIRE)
+ dnsdelete(c, ptr);
+
+ ptr = tmp;
+ }
+ }
+}
diff --git a/src/dnscache.h b/src/dnscache.h
new file mode 100644
index 0000000..ecb3cd6
--- /dev/null
+++ b/src/dnscache.h
@@ -0,0 +1,38 @@
+/* $Id: dnscache.h,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * See 'dnscache.c' for a detailed description.
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _DNSCACHE_H_
+#define _DNSCACHE_H_ 1
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#define DNS_GARBAGE_COL 100
+
+extern int dnscache(struct in_addr *addr, char *domain);
+extern void dnsclean(void);
+
+/* chris - Access these from reqs.c because of ADNS. Ugly. */
+extern int lookup(struct in_addr *addr, char *domain);
+extern int insert(struct in_addr *addr, char *domain);
+
+#endif
diff --git a/src/filter.c b/src/filter.c
new file mode 100644
index 0000000..4571b8a
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,150 @@
+/* $Id: filter.c,v 1.1.1.1 2000-02-16 17:32:18 sdyoung Exp $
+ *
+ * Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
+ *
+ * A substring of the domain to be filtered goes into the file
+ * pointed at by DEFAULT_FILTER.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "config.h"
+#include "utils.h"
+#include "tinyproxy.h"
+#include "filter.h"
+
+#include "regexp.h"
+#include <sysexits.h>
+
+static int err;
+
+struct filter_list {
+ struct filter_list *next;
+ char *pat;
+ regex_t *cpat;
+};
+
+
+static struct filter_list *fl = NULL;
+static int already_init = 0;
+
+/* initializes a linked list of strings containing hosts to be filtered */
+void filter_init(void)
+{
+ FILE *fd;
+ struct filter_list *p;
+ char buf[255];
+ char *s;
+
+ if (!fl && !already_init) {
+ fd = fopen(config.filter, "r");
+ if (fd) {
+ p = NULL;
+
+ while (fgets(buf, 255, (FILE *) fd)) {
+ s = buf;
+ if (!p) /* head of list */
+ fl = p = (struct filter_list *)
+ xmalloc(sizeof
+ (struct filter_list));
+ else { /* next entry */
+ p->next = (struct filter_list *)
+ xmalloc(sizeof
+ (struct filter_list));
+ p = p->next;
+ }
+ /* initialize space to zero */
+ memset(p, 0, sizeof(struct filter_list));
+
+ /* replace first whitespace with \0 */
+ while (*s++)
+ if (isspace((int) *s))
+ *s = '\0';
+
+ p->pat = xstrdup(buf);
+ p->cpat = xmalloc(sizeof(regex_t));
+ if (
+ (err =
+ regcomp(p->cpat, p->pat,
+ REG_NEWLINE | REG_NOSUB)) != 0) {
+ fprintf(stderr,
+ "Bad regex in %s: %s\n",
+ config.filter, p->pat);
+ exit(EX_DATAERR);
+ }
+ }
+ already_init = 1;
+ fclose(fd);
+ }
+ }
+}
+
+/* unlink the list */
+void filter_destroy(void)
+{
+ struct filter_list *p, *q;
+
+ if (already_init) {
+ for (p = q = fl; p; p = q) {
+ regfree(p->cpat);
+ safefree(p->cpat);
+ safefree(p->pat);
+ q = p->next;
+ safefree(p);
+ }
+ fl = NULL;
+ already_init = 0;
+ }
+}
+
+/* returns 0 if host is not an element of filter list, non-zero otherwise */
+int filter_host(char *host)
+{
+ struct filter_list *p;
+ char *s, *port;
+ int result;
+
+ assert(host);
+
+ if (!fl || !already_init)
+ return (0);
+
+ /* strip off the port number */
+ s = xstrdup(host);
+ port = strchr(s, ':');
+ if (port)
+ *port = '\0';
+
+ result = 0;
+
+ for (p = fl; p; p = p->next) {
+ result = !regexec(p->cpat, s, (size_t) 0, (regmatch_t *) 0, 0);
+
+ if (result)
+ break;
+ }
+ safefree(s);
+ return (result);
+}
diff --git a/src/filter.h b/src/filter.h
new file mode 100644
index 0000000..fb08f2f
--- /dev/null
+++ b/src/filter.h
@@ -0,0 +1,25 @@
+/* $Id: filter.h,v 1.1.1.1 2000-02-16 17:32:24 sdyoung Exp $
+ *
+ * See 'filter.c' for a detailed description.
+ *
+ * Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __FILTER_H_
+#define __FILTER_H_ 1
+
+extern void filter_init(void);
+extern void filter_destroy(void);
+extern int filter_host(char *host);
+
+#endif
diff --git a/src/gnuregex.c b/src/gnuregex.c
new file mode 100644
index 0000000..785d419
--- /dev/null
+++ b/src/gnuregex.c
@@ -0,0 +1,5880 @@
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
+ internationalization features.)
+ Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined _AIX && !defined REGEX_MALLOC
+ #pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#ifdef HAVE_CONFIG_H
+# include <defines.h> /* Tinyproxy uses defines.h for autoconf defs */
+#endif
+
+#ifndef PARAMS
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif /* GCC. */
+#endif /* Not PARAMS. */
+
+#if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
+#else
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+# include <sys/types.h>
+#endif
+
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
+
+#else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+# undef REL_ALLOC
+
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
+char *malloc ();
+char *realloc ();
+# endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+# ifndef Sword
+# define Sword 1
+# endif
+
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
+
+/* How many characters in the character set. */
+# define CHAR_SET_SIZE 256
+
+# ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+# else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+# endif /* not SYNTAX_TABLE */
+
+# define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "gnuregex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+#else
+# define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
+#ifndef NULL
+# define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+# ifndef alloca
+
+/* Make alloca work the best possible way. */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
+
+# endif /* not alloca */
+
+# define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+# define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ memcpy (destination, source, osize))
+
+/* No need to do anything to free, after alloca. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined REL_ALLOC && defined REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+# define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+# ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
+
+# else /* not REGEX_MALLOC */
+
+# define REGEX_ALLOCATE_STACK alloca
+
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+# define REGEX_FREE_STACK(arg)
+
+# endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop));
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number_and_incr _RE_ARGS ((int *destination,
+ unsigned char **source));
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+# include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+# include <assert.h>
+
+static int debug;
+
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ putchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ putchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p1;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ printf ("%d:\t", p - start);
+
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ putchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf ("/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p
+ && (p[1 + (c/8)] & (1 << (c % 8))))
+ {
+ /* Are we starting a range? */
+ if (last + 1 == c && ! in_range)
+ {
+ putchar ('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range)
+ {
+ putchar (last);
+ in_range = 0;
+ }
+
+ if (! in_range)
+ putchar (c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar (last);
+
+ putchar (']');
+
+ p += 1 + *p;
+ }
+ break;
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump to %d", p + mcnt - start);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt to %d", p + mcnt - start);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump to %d", p + mcnt - start);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+# ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+# endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+
+ putchar ('\n');
+ }
+
+ printf ("%d:\tend of pattern.\n", p - start);
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%ld bytes used/%ld bytes allocated.\n",
+ bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %lx\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ int this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ putchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ putchar (string2[this_char]);
+ }
+}
+
+void
+printchar (c)
+ int c;
+{
+ putc (c, stderr);
+}
+
+#else /* not DEBUG */
+
+# undef assert
+# define assert(e)
+
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+#ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+#endif /* DEBUG */
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char re_error_msgid[] =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+static const size_t re_error_msgid_idx[] =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+# undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+# define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+long int re_max_failures = 4000;
+# else
+long int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ long int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+int re_max_failures = 20000;
+# else
+int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#endif /* INT_IS_16BIT */
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#else
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ active_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+# define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+/* We used to use (num_regs - 1), which is the number of registers
+ this regexp will save; but that was changed to 5
+ to avoid stack overflow for a regexp with lots of parens. */
+#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
+static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2));
+static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg, unsigned char *end));
+static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2, unsigned char *end));
+static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
+ reg_syntax_t syntax));
+static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
+ reg_syntax_t syntax));
+static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
+ const char *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ unsigned char *b));
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#ifndef TRANSLATE
+# define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (int) ((to) - (loc) - 3))
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+#if defined _MSC_VER && !defined WIN32
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
+#else
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
+#endif
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+#else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static
+regex_grow_registers (num_regs)
+ int num_regs;
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (reg_info, num_regs, register_info_type);
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
+ compile_stack,
+ regnum_t regnum));
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ size_t size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random temporary spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ putchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined emacs && !defined SYNTAX_TABLE
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_upper = STREQ (str, "upper");
+ wctype_t wt;
+ int ch;
+
+ wt = IS_CHAR_CLASS (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ {
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
+ if (iswctype (btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# endif
+
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+
+ had_char_class = true;
+#else
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ if ( translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+#endif /* libc || wctype.h */
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_EBRACE);
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ FREE_STACK_RETURN (REG_ESUBREG);
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, (regnum_t) c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN (REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH (succeed);
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: \n");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+# ifdef emacs
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+# else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+# endif /* not emacs */
+ }
+
+ regex_grow_registers (num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ reg_syntax_t syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ RE_TRANSLATE_TYPE translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ unsigned int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
+ range_start = ((const unsigned char *) p)[-2];
+ range_end = ((const unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+#ifdef MATCH_MAY_ALLOCATE
+ fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned char *p = pattern;
+ register unsigned char *pend = pattern + bufp->used;
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (1)
+ {
+ if (p == pend || *p == succeed)
+ {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY ())
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ }
+ else
+ break;
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ goto done;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ {
+ int fastmap_newline = fastmap['\n'];
+
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ {
+ RESET_FAIL_STACK ();
+ return -2;
+ }
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK ();
+ return 0;
+} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+#endif
+
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+
+/* Free everything we malloc. */
+#ifdef MATCH_MAY_ALLOCATE
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+# define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else
+# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+{
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ pos, regs, size);
+# ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+# endif
+ return result;
+}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
+#endif /* not emacs */
+
+static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
+ int len, char *translate));
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+#endif
+ return result;
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ unsigned char *just_past_start_mem = 0;
+
+ /* We use this to map every character in the string. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+ static unsigned failure_id;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ size_t num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **regstart, **regend;
+#endif
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **old_regstart, **old_regend;
+#endif
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **best_regstart, **best_regend;
+#endif
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
+ /* Used when we pop values we don't care about. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+#ifdef MATCH_MAY_ALLOCATE
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* MATCH_MAY_ALLOCATE */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is:\n");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set || best_match_p)
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ succeed_label:
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ }
+ else
+ {
+ /* These braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert (bufp->regs_allocated == REGS_FIXED);
+ }
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
+ mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ FREE_VARIABLES ();
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+ case succeed:
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");
+ goto succeed_label;
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ just_past_start_mem = p;
+
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
+ r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if (old_regend[r] >= regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : memcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED ();
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+#endif
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+#endif
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ else if ((re_opcode_t) *p2 == charset)
+ {
+ /* We win if the first character of the loop is not part
+ of the charset. */
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
+ && (p2[2 + p1[5] / BYTEWIDTH]
+ & (1 << (p1[5] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < (int) p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ active_reg_t dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+ unconditional_jump:
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+ /* Note fall through. */
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+#ifdef _LIBC
+ DEBUG_PRINT2 ("(to %p).\n", p);
+#else
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+#endif
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
+#endif
+ }
+ else if (mcnt == 0)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
+#else
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+#endif
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
+#endif
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+#endif
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+#if 0
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+ macro and introducing temporary variables works around the bug. */
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+#else
+ case wordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ break;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ break;
+ goto fail;
+ }
+
+ case notwordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ goto fail;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ goto fail;
+ break;
+ }
+#endif
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ const char *s1, *s2;
+ register int len;
+ RE_TRANSLATE_TYPE translate;
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ if (!ret)
+ return NULL;
+ return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec below without link errors. */
+weak_function
+#endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return (char *) gettext (re_error_msgid
+ + re_error_msgid_idx[(int) REG_ESPACE]);
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (re_error_msgid
+ + re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
+}
+
+
+int
+#ifdef _LIBC
+weak_function
+#endif
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
+ return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (int) (sizeof (re_error_msgid_idx)
+ / sizeof (re_error_msgid_idx[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+#endif /* not emacs */
diff --git a/src/gnuregex.h b/src/gnuregex.h
new file mode 100644
index 0000000..d88ab92
--- /dev/null
+++ b/src/gnuregex.h
@@ -0,0 +1,542 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+ Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+# include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+# define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+# define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern,
+ int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__preg,
+ const char *__string, size_t __nmatch,
+ regmatch_t __pmatch[], int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+ char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..ca64f8e
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,90 @@
+/* $Id: log.c,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * Logs the various messages which tinyproxy produces to either a log file or
+ * the syslog daemon. Not much to it...
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * log.c - For the manipulation of log files.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <time.h>
+#include <syslog.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "tinyproxy.h"
+#include "log.h"
+
+#define LENGTH 16
+
+/*
+ * This routine logs messages to either the log file or the syslog function.
+ */
+void log(char *fmt, ...)
+{
+ va_list args;
+ time_t nowtime;
+ FILE *cf;
+
+#if defined(HAVE_SYSLOG_H) && !defined(HAVE_VSYSLOG_H)
+ static char str[800];
+#endif
+ static char time_string[LENGTH];
+
+ assert(fmt);
+
+ va_start(args, fmt);
+
+#ifdef HAVE_SYSLOG_H
+ if (config.syslog == FALSE) {
+#endif
+ nowtime = time(NULL);
+ /* Format is month day hour:minute:second (24 time) */
+ strftime(time_string, LENGTH, "%b %d %H:%M:%S",
+ localtime(&nowtime));
+
+ if (!(cf = config.logf))
+ cf = stderr;
+
+ fprintf(cf, "%s [%d]: ", time_string, getpid());
+ vfprintf(cf, fmt, args);
+ fprintf(cf, "\n");
+ fflush(cf);
+#ifdef HAVE_SYSLOG_H
+ } else {
+# ifdef HAVE_VSYSLOG_H
+ vsyslog(LOG_INFO, fmt, args);
+# else
+# ifdef HAVE_VSNPRINTF
+ vsnprintf(str, 800, fmt, args);
+# else
+# ifdef HAVE_VPRINTF
+ vsprintf(str, fmt, args);
+# endif
+# endif
+ syslog(LOG_INFO, str);
+# endif
+ }
+#endif
+
+ va_end(args);
+}
diff --git a/src/log.h b/src/log.h
new file mode 100644
index 0000000..e928b4c
--- /dev/null
+++ b/src/log.h
@@ -0,0 +1,24 @@
+/* $Id: log.h,v 1.1.1.1 2000-02-16 17:32:22 sdyoung Exp $
+ *
+ * See 'log.c' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _LOG_H_
+#define _LOG_H_ 1
+
+extern void log(char *fmt, ...);
+
+#endif
diff --git a/src/regexp.h b/src/regexp.h
new file mode 100644
index 0000000..1c40332
--- /dev/null
+++ b/src/regexp.h
@@ -0,0 +1,24 @@
+/* $Id: regexp.h,v 1.1.1.1 2000-02-16 17:32:24 sdyoung Exp $
+ *
+ * We need this little header to help distinguish whether to use the REGEX
+ * library installed in the system, or to include our own version (the GNU
+ * version to be exact.)
+ *
+ * Copyright (C) 2000 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef USE_GNU_REGEX
+# include "gnuregex.h"
+#else
+# include <regex.h>
+#endif
diff --git a/src/reqs.c b/src/reqs.c
new file mode 100644
index 0000000..7d6aa9f
--- /dev/null
+++ b/src/reqs.c
@@ -0,0 +1,832 @@
+/* $Id: reqs.c,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * This is where all the work in tinyproxy is actually done. Incoming
+ * connections are added to the active list of connections and then the header
+ * is processed to determine what is being requested. tinyproxy then connects
+ * to the remote server and begins to relay the bytes back and forth between
+ * the client and the remote server. Basically, we sit there and sling bytes
+ * back and forth. Optionally, we can weed out headers we do not want to send,
+ * and also add a header of our own.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ * Copyright (C) 2000 Chris Lightfoot (chris@ex-parrot.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sysexits.h>
+#include <assert.h>
+
+#include "config.h"
+#include "tinyproxy.h"
+#include "sock.h"
+#include "utils.h"
+#include "conns.h"
+#include "log.h"
+#include "reqs.h"
+#include "buffer.h"
+#include "filter.h"
+#include "uri.h"
+#include "regexp.h"
+
+/* chris - for asynchronous DNS */
+#include "dnscache.h"
+#include <adns.h>
+extern adns_state adns;
+
+#ifdef XTINYPROXY
+
+static void Add_XTinyproxy_Header(struct conn_s *connptr)
+{
+ char *header_line;
+ char ipaddr[PEER_IP_LENGTH];
+ int length;
+
+ assert(connptr);
+
+ if (!(header_line = xmalloc(sizeof(char) * 32)))
+ return;
+
+ length = sprintf(header_line, "X-Tinyproxy: %s\r\n",
+ getpeer_ip(connptr->client_fd, ipaddr));
+
+ unshift_buffer(connptr->cbuffer, header_line, length);
+}
+
+#endif
+
+#define HTTPPATTERN "^([a-z]+)[ \t]+([^ \t]+)([ \t]+(HTTP/[0-9]+\\.[0-9]+))?"
+#define NMATCH 4
+#define METHOD_IND 1
+#define URI_IND 2
+#define VERSION_MARK 3
+#define VERSION_IND 4
+
+#define HTTP400ERROR "Unrecognizable request. Only HTTP is allowed."
+#define HTTP500ERROR "Unable to connect to remote server."
+#define HTTP503ERROR "Internal server error."
+
+/*
+ * Parse a client HTTP request and then establish connection.
+ */
+static int clientreq(struct conn_s *connptr)
+{
+ URI *uri = NULL;
+ char *inbuf, *buffer, *request, *port;
+ char *inbuf_ptr;
+
+ regex_t preg;
+ regmatch_t pmatch[NMATCH];
+
+ long len;
+ int fd, port_no;
+
+ char peer_ipaddr[PEER_IP_LENGTH];
+
+ assert(connptr);
+
+ getpeer_ip(connptr->client_fd, peer_ipaddr);
+ /* chris - getpeer_string could block, so for the moment take it out */
+
+ if ((len
+ = readline(connptr->client_fd, connptr->cbuffer, &inbuf)) <= 0) {
+ return len;
+ }
+
+ inbuf_ptr = inbuf + len - 1;
+ while (*inbuf_ptr == '\r' || *inbuf_ptr == '\n')
+ *inbuf_ptr-- = '\0';
+
+ /* Log the incoming connection */
+ if (!config.restricted) {
+ log("Connect: %s", peer_ipaddr);
+ log("Request: %s", inbuf);
+ }
+
+ if (regcomp(&preg, HTTPPATTERN, REG_EXTENDED | REG_ICASE) != 0) {
+ log("ERROR clientreq: regcomp");
+ return 0;
+ }
+ if (regexec(&preg, inbuf, NMATCH, pmatch, 0) != 0) {
+ log("ERROR clientreq: regexec");
+ regfree(&preg);
+ return -1;
+ }
+ regfree(&preg);
+
+ if (pmatch[VERSION_MARK].rm_so == -1)
+ connptr->simple_req = TRUE;
+
+ if (pmatch[METHOD_IND].rm_so == -1 || pmatch[URI_IND].rm_so == -1) {
+ log("ERROR clientreq: Incomplete line from %s (%s)",
+ peer_ipaddr, inbuf);
+ httperr(connptr, 400, HTTP400ERROR);
+ goto COMMON_EXIT;
+ }
+
+ len = pmatch[URI_IND].rm_eo - pmatch[URI_IND].rm_so;
+ if (!(buffer = xmalloc(len + 1))) {
+ log("ERROR clientreq: Cannot allocate buffer for request from %s",
+ peer_ipaddr);
+ httperr(connptr, 503, HTTP503ERROR);
+ goto COMMON_EXIT;
+ }
+ memcpy(buffer, inbuf + pmatch[URI_IND].rm_so, len);
+ buffer[len] = '\0';
+ if (!(uri = explode_uri(buffer))) {
+ safefree(buffer);
+ log("ERROR clientreq: Problem with explode_uri");
+ httperr(connptr, 503, HTTP503ERROR);
+ goto COMMON_EXIT;
+ }
+ safefree(buffer);
+
+ if (strcasecmp(uri->scheme, "http") != 0) {
+ char *error_string = xmalloc(strlen(uri->scheme) + 64);
+ sprintf(error_string, "Invalid scheme (%s). Only HTTP is allowed.",
+ uri->scheme);
+ httperr(connptr, 400, error_string);
+ safefree(error_string);
+ goto COMMON_EXIT;
+ }
+
+ if ((strlen(config.stathost) > 0) &&
+ strcasecmp(uri->authority, config.stathost) == 0) {
+ showstats(connptr);
+ goto COMMON_EXIT;
+ }
+
+ port_no = 80;
+ if ((port = strchr(uri->authority, ':'))) {
+ *port++ = '\0';
+ if (strlen(port) > 0)
+ port_no = atoi(port);
+ }
+
+ /* chris - so this can be passed on to clientreq_dnscomplete. */
+ connptr->port_no = port_no;
+
+#ifdef FILTER_ENABLE
+ /* Filter domains out */
+ if (config.filter) {
+ if (filter_host(uri->authority)) {
+ log("ERROR clientreq: Filtered connection (%s)",
+ peer_ipaddr);
+ httperr(connptr, 404,
+ "Unable to connect to filtered host.");
+ goto COMMON_EXIT;
+ }
+ }
+#endif /* FILTER_ENABLE */
+
+ /* Build a new request from the first line of the header */
+ if (!(request = xmalloc(strlen(inbuf) + 1))) {
+ log("ERROR clientreq: cannot allocate buffer for request from %s",
+ peer_ipaddr);
+ httperr(connptr, 503, HTTP503ERROR);
+ goto COMMON_EXIT;
+ }
+
+ /* We need to set the version number WE support */
+ memcpy(request, inbuf, pmatch[METHOD_IND].rm_eo);
+ request[pmatch[METHOD_IND].rm_eo] = '\0';
+ strcat(request, " ");
+ strcat(request, uri->path);
+ if (uri->query) {
+ strcat(request, "?");
+ strcat(request, uri->query);
+ }
+ strcat(request, " HTTP/1.0\r\n");
+
+ /* chris - If domain is in dotted-quad format or is already in the
+ * DNS cache, then go straight to WAITCONN.
+ */
+ if (inet_aton(uri->authority, NULL) || lookup(NULL, uri->authority) == 0) {
+ if ((fd = opensock(uri->authority, port_no)) < 0) {
+ safefree(request);
+ httperr(connptr, 500,
+ "Unable to connect to host (cannot create sock)");
+ stats.num_badcons++;
+ goto COMMON_EXIT;
+ }
+
+ connptr->server_fd = fd;
+ connptr->type = WAITCONN;
+ }
+ /* Otherwise submit a DNS request and hope for the best. */
+ else {
+ if (adns_submit(adns, uri->authority, adns_r_a, adns_qf_quoteok_cname | adns_qf_cname_loose, connptr, &(connptr->adns_qu))) {
+ safefree(request);
+ httperr(connptr, 500, "Resolver error connecting to host");
+ stats.num_badcons++;
+ goto COMMON_EXIT;
+ } else {
+#ifdef __DEBUG__
+ log("DNS request submitted");
+#endif
+ /* copy domain for later caching */
+ connptr->domain = strdup(uri->authority);
+ connptr->type = DNS_WAITCONN;
+ }
+ }
+
+#ifdef XTINYPROXY
+ /* Add a X-Tinyproxy header which contains our IP address */
+ if (config.my_domain
+ && xstrstr(uri->authority, config.my_domain,
+ strlen(uri->authority), FALSE)) {
+ Add_XTinyproxy_Header(connptr);
+ }
+#endif
+
+ /* Add the rewritten request to the buffer */
+ unshift_buffer(connptr->cbuffer, request, strlen(request));
+
+ /*
+ * HACK HACK HACK: When we're sending a POST there is no restriction
+ * on the length of the header. If we don't let all the header lines
+ * through, the POST will not work. This _definitely_ needs to be
+ * fixed. - rjkaes
+ */
+ if (!xstrstr(inbuf, "POST ", 5, FALSE)) {
+ connptr->clientheader = TRUE;
+ }
+
+ COMMON_EXIT:
+ safefree(inbuf);
+ free_uri(uri);
+ return 0;
+}
+
+/* chris - added this to move a connection from the DNS_WAITCONN state
+ * to the WAITCONN state by connecting it to the server, once the name
+ * has been resolved.
+ */
+static int clientreq_dnscomplete(struct conn_s *connptr, struct in_addr *inaddr) {
+ int fd;
+
+ fd = opensock_inaddr(inaddr, connptr->port_no);
+
+ if (fd < 0) {
+#ifdef __DEBUG__
+ log("Failed to open connection to server");
+#endif
+ httperr(connptr, 500,
+ "Unable to connect to host (cannot create sock)");
+ stats.num_badcons++;
+
+ return 0;
+ } else {
+#ifdef __DEBUG__
+ log("Connected to server");
+#endif
+ connptr->server_fd = fd;
+ connptr->type = WAITCONN;
+ }
+
+ return 0;
+}
+
+/*
+ * Finish the client request
+ */
+static int clientreq_finish(struct conn_s *connptr)
+{
+ int sockopt, len = sizeof(sockopt);
+
+ assert(connptr);
+
+ if (getsockopt
+ (connptr->server_fd, SOL_SOCKET, SO_ERROR, &sockopt, &len) < 0) {
+ log("ERROR clientreq_finish: getsockopt error (%s)",
+ strerror(errno));
+ return -1;
+ }
+
+ if (sockopt != 0) {
+ if (sockopt == EINPROGRESS)
+ return 0;
+ else if (sockopt == ETIMEDOUT) {
+ httperr(connptr, 408, "Connect Timed Out");
+ return 0;
+ } else {
+ log("ERROR clientreq_finish: could not create connection (%s)",
+ strerror(sockopt));
+ return -1;
+ }
+ }
+
+ connptr->type = RELAYCONN;
+ stats.num_opens++;
+ return 0;
+}
+
+/*
+ * Check to see if the line is allowed or not depending on the anonymous
+ * headers which are to be allowed.
+ */
+static int anonheader(char *line)
+{
+ struct allowedhdr_s *allowedptr = allowedhdrs;
+
+ assert(line);
+ assert(allowedhdrs);
+
+ if (!xstrstr(line, "GET ", 4, FALSE)
+ || !xstrstr(line, "POST ", 5, FALSE)
+ || !xstrstr(line, "HEAD ", 5, FALSE))
+ return 1;
+
+ for (allowedptr = allowedhdrs; allowedptr;
+ allowedptr = allowedptr->next) {
+ if (!strncasecmp
+ (line, allowedptr->hdrname, strlen(allowedptr->hdrname))) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Used to read in the lines from the header (client side) when we're doing
+ * the anonymous header reduction.
+ */
+static int readanonconn(struct conn_s *connptr)
+{
+ char *line = NULL;
+ int retv;
+
+ assert(connptr);
+
+ if ((retv = readline(connptr->client_fd, connptr->cbuffer, &line)) <=
+ 0) {
+ return retv;
+ }
+
+ if ((line[0] == '\n') || (strncmp(line, "\r\n", 2) == 0)) {
+ connptr->clientheader = TRUE;
+ } else if (!anonheader(line)) {
+ safefree(line);
+ return 0;
+ }
+
+ push_buffer(connptr->cbuffer, line, strlen(line));
+ return 0;
+}
+
+/*
+ * Read in the bytes from the socket
+ */
+static int readconn(int fd, struct buffer_s *buffptr)
+{
+ int bytesin;
+
+ assert(fd >= 0);
+ assert(buffptr);
+
+ if ((bytesin = readbuff(fd, buffptr)) < 0) {
+ return -1;
+ }
+#ifdef __DEBUG__
+ log("readconn [%d]: %d", fd, bytesin);
+#endif
+
+ stats.num_rx += bytesin;
+ return bytesin;
+}
+
+/*
+ * Write the bytes from the buffer to the socket
+ */
+static int writeconn(int fd, struct buffer_s *buffptr)
+{
+ int bytessent;
+
+ assert(fd >= 0);
+ assert(buffptr);
+
+ if ((bytessent = writebuff(fd, buffptr)) < 0) {
+ return -1;
+ }
+
+ stats.num_tx += bytessent;
+ return bytessent;
+}
+
+/*
+ * Factored out the common function to read from the client. It was used in
+ * two different places with no change, so no point in having the same code
+ * twice.
+ */
+static int read_from_client(struct conn_s *connptr, fd_set * readfds)
+{
+ assert(connptr);
+ assert(readfds);
+
+ if (FD_ISSET(connptr->client_fd, readfds)) {
+ if (config.anonymous && !connptr->clientheader) {
+ if (readanonconn(connptr) < 0) {
+ shutdown(connptr->client_fd, 2);
+ shutdown(connptr->server_fd, 2);
+ connptr->type = FINISHCONN;
+ return -1;
+ }
+ } else if (readconn(connptr->client_fd, connptr->cbuffer) < 0) {
+ shutdown(connptr->client_fd, 2);
+ shutdown(connptr->server_fd, 2);
+ connptr->type = FINISHCONN;
+ return -1;
+ }
+ connptr->actiontime = time(NULL);
+ }
+
+ return 0;
+}
+
+/*
+ * Factored out the common write to client function since, again, it was used
+ * in two different places with no changes.
+ */
+static int write_to_client(struct conn_s *connptr, fd_set * writefds)
+{
+ assert(connptr);
+ assert(writefds);
+
+ if (FD_ISSET(connptr->client_fd, writefds)) {
+ if (writeconn(connptr->client_fd, connptr->sbuffer) < 0) {
+ shutdown(connptr->client_fd, 2);
+ shutdown(connptr->server_fd, 2);
+ connptr->type = FINISHCONN;
+ return -1;
+ }
+ connptr->actiontime = time(NULL);
+ }
+
+ return 0;
+}
+
+/*
+ * All of the *_req functions handle the various stages a connection can go
+ * through. I moved them out from getreqs because they handle a lot of error
+ * code and it was indenting too far in. As you can see they are very simple,
+ * and are only called once from getreqs, hence the inlining.
+ */
+
+inline static void newconn_req(struct conn_s *connptr, fd_set * readfds)
+{
+ assert(connptr);
+ assert(readfds);
+
+ if (FD_ISSET(connptr->client_fd, readfds)) {
+ if (clientreq(connptr) < 0) {
+ shutdown(connptr->client_fd, 2);
+ connptr->type = FINISHCONN;
+ return;
+ }
+
+ if (!connptr)
+ abort();
+
+ connptr->actiontime = time(NULL);
+ }
+}
+
+inline static void waitconn_req(struct conn_s *connptr, fd_set * readfds,
+ fd_set * writefds)
+{
+ assert(connptr);
+ assert(readfds);
+ assert(writefds);
+
+ if (read_from_client(connptr, readfds) < 0)
+ return;
+
+ if (FD_ISSET(connptr->server_fd, readfds)
+ || FD_ISSET(connptr->server_fd, writefds)) {
+ if (clientreq_finish(connptr) < 0) {
+ shutdown(connptr->server_fd, 2);
+ shutdown(connptr->client_fd, 2);
+ connptr->type = FINISHCONN;
+ return;
+ }
+ connptr->actiontime = time(NULL);
+ }
+}
+
+inline static void relayconn_req(struct conn_s *connptr, fd_set * readfds,
+ fd_set * writefds)
+{
+ assert(connptr);
+ assert(readfds);
+ assert(writefds);
+
+ if (read_from_client(connptr, readfds) < 0)
+ return;
+
+ if (FD_ISSET(connptr->server_fd, readfds)) {
+ if (connptr->serverheader) {
+ if (readconn(connptr->server_fd, connptr->sbuffer) < 0) {
+ shutdown(connptr->server_fd, 2);
+ connptr->type = CLOSINGCONN;
+ return;
+ }
+ } else {
+ /*
+ * We need to read in the first line to rewrite the
+ * version back down to HTTP/1.0 (if needed)
+ */
+ char *line = NULL, *ptr, *newline;
+ int retv;
+
+ if (
+ (retv =
+ readline(connptr->server_fd, connptr->sbuffer,
+ &line)) < 0) {
+ shutdown(connptr->server_fd, 2);
+ httperr(connptr, 500, "Server Closed Early");
+ return;
+ } else if (retv == 0)
+ return;
+
+ connptr->serverheader = TRUE;
+
+ if (strncasecmp(line, "HTTP/1.0", 8)) {
+ /* Okay, we need to rewrite it then */
+ if (!(ptr = strchr(line, ' '))) {
+ shutdown(connptr->server_fd, 2);
+ httperr(connptr, 500,
+ "There was Server Error");
+ return;
+ }
+ ptr++;
+
+ if (!(newline = xmalloc(strlen(line) + 1))) {
+ shutdown(connptr->server_fd, 2);
+ httperr(connptr, 503,
+ "No Memory Available");
+ return;
+ }
+
+ sprintf(newline, "HTTP/1.0 %s", ptr);
+ safefree(line);
+ line = newline;
+ }
+
+ push_buffer(connptr->sbuffer, line, strlen(line));
+ }
+ connptr->actiontime = time(NULL);
+ }
+
+ if (write_to_client(connptr, writefds) < 0)
+ return;
+
+ if (FD_ISSET(connptr->server_fd, writefds)) {
+ if (writeconn(connptr->server_fd, connptr->cbuffer) < 0) {
+ shutdown(connptr->server_fd, 2);
+ connptr->type = CLOSINGCONN;
+ return;
+ }
+ connptr->actiontime = time(NULL);
+ }
+}
+
+inline static void closingconn_req(struct conn_s *connptr, fd_set * writefds)
+{
+ assert(connptr);
+ assert(writefds);
+
+ write_to_client(connptr, writefds);
+}
+
+/*
+ * Check against the valid subnet to see if we should allow the access
+ */
+static int validuser(int fd)
+{
+ char ipaddr[PEER_IP_LENGTH];
+
+ assert(fd >= 0);
+
+ if (config.subnet == NULL)
+ return 1;
+
+ if (!strncmp(config.subnet, getpeer_ip(fd, ipaddr),
+ strlen(config.subnet))) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Loop that checks for new connections, dispatches to the correct
+ * routines if bytes are pending, checks to see if it's time for a
+ * garbage collect.
+ */
+int getreqs(void)
+{
+ static unsigned int garbc = 0;
+ fd_set readfds, writefds, exceptfds; /* chris - ADNS expects exceptfds */
+ struct conn_s *connptr;
+ int fd;
+ struct timeval tv, now; /* chris - for ADNS timeouts */
+
+ char peer_ipaddr[PEER_IP_LENGTH];
+
+ if (setup_fd < 0) {
+ log("ERROR getreqs: setup_fd not a socket");
+ return -1;
+ }
+
+ /* Garbage collect the dead connections and close any idle ones */
+ if (garbc++ >= GARBCOLL_INTERVAL) {
+ garbcoll();
+ garbc = 0;
+ }
+ conncoll();
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_SET(setup_fd, &readfds);
+
+ for (connptr = connections; connptr; connptr = connptr->next) {
+#ifdef __DEBUG__
+ log("Connptr: %p - %d / client %d server %d", connptr,
+ connptr->type, connptr->client_fd, connptr->server_fd);
+#endif
+ switch (connptr->type) {
+ case NEWCONN:
+ if (buffer_size(connptr->cbuffer) < MAXBUFFSIZE)
+ FD_SET(connptr->client_fd, &readfds);
+ else {
+ httperr(connptr, 414,
+ "Your Request is way too long.");
+ }
+ break;
+
+ /* no case here for DNS_WAITCONN */
+
+ case WAITCONN:
+ FD_SET(connptr->server_fd, &readfds);
+ FD_SET(connptr->server_fd, &writefds);
+
+ if (buffer_size(connptr->cbuffer) < MAXBUFFSIZE)
+ FD_SET(connptr->client_fd, &readfds);
+ break;
+
+ case RELAYCONN:
+ if (buffer_size(connptr->sbuffer) > 0)
+ FD_SET(connptr->client_fd, &writefds);
+ if (buffer_size(connptr->sbuffer) < MAXBUFFSIZE)
+ FD_SET(connptr->server_fd, &readfds);
+
+ if (buffer_size(connptr->cbuffer) > 0)
+ FD_SET(connptr->server_fd, &writefds);
+ if (buffer_size(connptr->cbuffer) < MAXBUFFSIZE)
+ FD_SET(connptr->client_fd, &readfds);
+
+ break;
+
+ case CLOSINGCONN:
+ if (buffer_size(connptr->sbuffer) > 0)
+ FD_SET(connptr->client_fd, &writefds);
+ else {
+ shutdown(connptr->client_fd, 2);
+ shutdown(connptr->server_fd, 2);
+ connptr->type = FINISHCONN;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Set a 60 second time out */
+ tv.tv_sec = 1;//60;
+ tv.tv_usec = 0;
+
+ /* chris - Make ADNS do its stuff, too. */
+ {
+ struct timeval *tv_mod = &tv;
+ int foo = FD_SETSIZE;
+ gettimeofday(&now, NULL);
+ FD_ZERO(&exceptfds);
+ adns_beforeselect(adns, &foo, &readfds, &writefds, &exceptfds, &tv_mod, NULL, &now);
+ }
+
+ if (select(FD_SETSIZE, &readfds, &writefds, &exceptfds, &tv) < 0) {
+#ifdef __DEBUG__
+ log("select error: %s", strerror(errno));
+#endif
+ return 0;
+ }
+
+ /* chris - see whether any ADNS lookups have completed */
+ gettimeofday(&now, NULL);
+ adns_afterselect(adns, FD_SETSIZE, &readfds, &writefds, &exceptfds, &now);
+
+ for (connptr = connections; connptr; connptr = connptr->next) {
+ adns_answer *ans;
+
+ if (connptr->type == DNS_WAITCONN &&
+ adns_check(adns, &(connptr->adns_qu), &ans, (void**)&connptr) == 0) {
+
+ if (ans->status == adns_s_ok) {
+ if (connptr->domain) {
+ insert(ans->rrs.inaddr, connptr->domain);
+ free(connptr->domain);
+ }
+
+ clientreq_dnscomplete(connptr, ans->rrs.inaddr);
+ free(ans);
+
+ /* hack */
+ FD_SET(connptr->server_fd, &readfds);
+#ifdef __DEBUG__
+ log("DNS resolution successful");
+#endif
+ } else {
+ free(ans);
+
+ httperr(connptr, 500, "Unable to resolve hostname in URL");
+#ifdef __DEBUG__
+ log("DNS resolution failed");
+#endif
+ }
+ }
+ }
+
+ /* Check to see if there are new connections pending */
+ if (FD_ISSET(setup_fd, &readfds) && (fd = listen_sock()) >= 0) {
+ new_conn(fd); /* make a connection from the FD */
+
+ if (validuser(fd)) {
+ if (config.cutoffload && (load > config.cutoffload)) {
+ stats.num_refused++;
+ httperr(connptr, 503,
+ "tinyproxy is not accepting connections due to high system load");
+ }
+ } else {
+ httperr(connptr, 403,
+ "You are not authorized to use the service.");
+ log("AUTH Rejected connection from %s",
+ getpeer_ip(fd, peer_ipaddr));
+ }
+ }
+
+ /*
+ * Loop through the connections and dispatch them to the appropriate
+ * handler
+ */
+ for (connptr = connections; connptr; connptr = connptr->next) {
+ switch (connptr->type) {
+ case NEWCONN:
+ newconn_req(connptr, &readfds);
+ break;
+
+ case WAITCONN:
+ waitconn_req(connptr, &readfds, &writefds);
+ break;
+
+ case RELAYCONN:
+ relayconn_req(connptr, &readfds, &writefds);
+ break;
+
+ case CLOSINGCONN:
+ closingconn_req(connptr, &writefds);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/reqs.h b/src/reqs.h
new file mode 100644
index 0000000..182fc20
--- /dev/null
+++ b/src/reqs.h
@@ -0,0 +1,24 @@
+/* $Id: reqs.h,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * See 'reqs.c' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _REQS_H_
+#define _REQS_H_ 1
+
+extern int getreqs(void);
+
+#endif
diff --git a/src/sock.c b/src/sock.c
new file mode 100644
index 0000000..dc415e5
--- /dev/null
+++ b/src/sock.c
@@ -0,0 +1,362 @@
+/* $Id: sock.c,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * Sockets are created and destroyed here. When a new connection comes in from
+ * a client, we need to copy the socket and the create a second socket to the
+ * remote server the client is trying to connect to. Also, the listening
+ * socket is created and destroyed here. Sounds more impressive than it
+ * actually is.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ * Copyright (C) 2000 Chris Lightfoot (chris@ex-parrot.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include <netdb.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <assert.h>
+
+#include "tinyproxy.h"
+#include "sock.h"
+#include "log.h"
+#include "utils.h"
+#include "dnscache.h"
+
+/* This routine is so old I can't even remember writing it. But I do
+ * remember that it was an .h file because I didn't know putting code in a
+ * header was bad magic yet. anyway, this routine opens a connection to a
+ * system and returns the fd.
+ */
+
+/*
+ * Cleaned up some of the code to use memory routines which are now the
+ * default. Also, the routine first checks to see if the address is in
+ * dotted-decimal form before it does a name lookup. Finally, the newly
+ * created socket is made non-blocking.
+ * - rjkaes
+ */
+int opensock(char *ip_addr, int port)
+{
+ int sock_fd, flags;
+ struct sockaddr_in port_info;
+
+ assert(ip_addr);
+ assert(port > 0);
+
+ memset((struct sockaddr *) &port_info, 0, sizeof(port_info));
+
+ port_info.sin_family = AF_INET;
+
+ /* chris - Could block; neet to ensure that this is never called
+ * before a non-blocking DNS query happens for this address. Not
+ * relevant in the code as it stands.
+ */
+ if (dnscache(&port_info.sin_addr, ip_addr) < 0) {
+ log("ERROR opensock: Could not lookup address: %s", ip_addr);
+ return -1;
+ }
+
+ port_info.sin_port = htons(port);
+
+ if ((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
+ log("ERROR opensock: socket (%s)", strerror(errno));
+ return -1;
+ }
+
+ flags = fcntl(sock_fd, F_GETFL, 0);
+ fcntl(sock_fd, F_SETFL, flags | O_NONBLOCK);
+
+ if (connect
+ (sock_fd, (struct sockaddr *) &port_info, sizeof(port_info)) < 0) {
+ if (errno != EINPROGRESS) {
+ log("ERROR opensock: connect (%s)", strerror(errno));
+ return -1;
+ }
+ }
+
+ return sock_fd;
+}
+
+/* chris - added this to open a socket given a struct in_addr */
+int opensock_inaddr(struct in_addr *inaddr, int port)
+{
+ int sock_fd, flags;
+ struct sockaddr_in port_info;
+
+ assert(inaddr);
+ assert(port > 0);
+
+ memset((struct sockaddr *) &port_info, 0, sizeof(port_info));
+
+ port_info.sin_family = AF_INET;
+
+ memcpy(&port_info.sin_addr, inaddr, sizeof(struct in_addr));
+
+ port_info.sin_port = htons(port);
+
+ if ((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
+ log("ERROR opensock_inaddr: socket (%s)", strerror(errno));
+ return -1;
+ }
+
+ flags = fcntl(sock_fd, F_GETFL, 0);
+ fcntl(sock_fd, F_SETFL, flags | O_NONBLOCK);
+
+ if (connect
+ (sock_fd, (struct sockaddr *) &port_info, sizeof(port_info)) < 0) {
+ if (errno != EINPROGRESS) {
+ log("ERROR opensock: connect (%s)", strerror(errno));
+ return -1;
+ }
+ }
+
+ return sock_fd;
+}
+
+int setup_fd;
+static struct sockaddr listen_sock_addr;
+
+/*
+ * Start listening to a socket.
+ */
+int init_listen_sock(int port)
+{
+ struct sockaddr_in laddr;
+ int i = 1;
+
+ assert(port > 0);
+
+ if ((setup_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ log("ERROR init_listen_sock: socket (%s)", strerror(errno));
+ return -1;
+ }
+
+ if (setsockopt(setup_fd, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i)) < 0) {
+ log("ERROR init_listen_sock: setsockopt (%s)",
+ strerror(errno));
+ return -1;
+ }
+
+ memset(&listen_sock_addr, 0, sizeof(listen_sock_addr));
+ memset(&laddr, 0, sizeof(laddr));
+ laddr.sin_family = AF_INET;
+ laddr.sin_port = htons(port);
+
+ if (config.ipAddr) {
+ laddr.sin_addr.s_addr = inet_addr(config.ipAddr);
+ } else {
+ laddr.sin_addr.s_addr = inet_addr("0.0.0.0");
+ }
+ if (bind(setup_fd, (struct sockaddr *) &laddr, sizeof(laddr)) < 0) {
+ log("ERROR init_listen_sock: bind (%s)", strerror(errno));
+ return -1;
+ }
+ if ((listen(setup_fd, MAXLISTEN)) != 0) {
+ log("ERROR init_listen_sock: listen (%s)", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Grab a pending connection
+ */
+int listen_sock(void)
+{
+ static int sock;
+ int sz = sizeof(listen_sock_addr);
+
+ if ((sock = accept(setup_fd, &listen_sock_addr, &sz)) < 0) {
+ if (errno != ECONNABORTED
+#ifdef EPROTO
+ && errno != EPROTO
+#endif
+#ifdef EWOULDBLOCK
+ && errno != EWOULDBLOCK
+#endif
+ && errno != EINTR)
+ log("ERROR listen_sock: accept (%s)", strerror(errno));
+ return -1;
+ }
+ stats.num_listens++;
+
+ return sock;
+}
+
+/*
+ * Stop listening on a socket.
+ */
+void de_init_listen_sock(void)
+{
+ close(setup_fd);
+}
+
+/*
+ * Takes a socket descriptor and returns the string contain the peer's
+ * IP address.
+ */
+char *getpeer_ip(int fd, char *ipaddr)
+{
+ struct sockaddr_in name;
+ int namelen = sizeof(name);
+
+ assert(fd >= 0);
+ assert(ipaddr);
+
+ memset(ipaddr, '\0', PEER_IP_LENGTH);
+
+ if (getpeername(fd, (struct sockaddr *) &name, &namelen) != 0) {
+ log("ERROR Connect: 'could not get peer name'");
+ } else {
+ strncpy(ipaddr,
+ inet_ntoa(*(struct in_addr *) &name.sin_addr.s_addr),
+ PEER_IP_LENGTH - 1);
+ }
+
+ return ipaddr;
+}
+
+/*
+ * Takes a socket descriptor and returns the string containing the peer's
+ * address.
+ */
+char *getpeer_string(int fd, char *string)
+{
+ struct sockaddr_in name;
+ int namelen = sizeof(name);
+ struct hostent *peername;
+
+ assert(fd >= 0);
+ assert(string);
+
+ memset(string, '\0', PEER_STRING_LENGTH);
+
+ if (getpeername(fd, (struct sockaddr *) &name, &namelen) != 0) {
+ log("ERROR Connect: 'could not get peer name'");
+ } else {
+ if (
+ (peername =
+ gethostbyaddr((char *) &name.sin_addr.s_addr,
+ sizeof(name.sin_addr.s_addr),
+ AF_INET)) != NULL) {
+ strncpy(string, peername->h_name,
+ PEER_STRING_LENGTH - 1);
+ }
+ }
+
+ return string;
+}
+
+
+/*
+ * Read one line of the header
+ */
+int readline(int fd, struct buffer_s *buffer, char **line)
+{
+ char inbuf[BUFFER];
+ int bytesin;
+ char *endline = NULL;
+ char *newline;
+ struct bufline_s *oldline;
+ unsigned long len = 0, length;
+
+ assert(fd >= 0);
+ assert(buffer);
+ assert(line);
+
+ *line = NULL;
+
+ if ((bytesin = recv(fd, inbuf, BUFFER - 1, MSG_PEEK)) <= 0) {
+ goto CONN_ERROR;
+ }
+
+ endline = xstrstr(inbuf, "\n", bytesin, FALSE);
+
+ if (endline) {
+ endline++;
+ *endline = '\0';
+
+ length = strlen(inbuf);
+ memset(inbuf, '\0', BUFFER);
+
+ /* Actually pull it off the queue */
+ if ((bytesin = recv(fd, inbuf, length, 0)) <= 0) {
+ goto CONN_ERROR;
+ }
+
+ *line = xstrdup(inbuf);
+
+ if (buffer_size(buffer) > 0) {
+ if (!(newline = xmalloc(buffer_size(buffer)
+ + length + 1))) {
+ return -1;
+ }
+
+ newline[0] = '\0';
+
+ while ((oldline = shift_buffer(buffer))) {
+ memcpy(newline + len, oldline->string,
+ oldline->length);
+
+ len += oldline->length;
+ free_line(oldline);
+ }
+
+ memcpy(newline + len, *line, strlen(*line) + 1);
+ safefree(*line);
+
+ *line = newline;
+ }
+
+ return strlen(*line);
+ }
+
+ /*
+ * We didn't find a '\r\n', but we've filled the buffer.. pull it off
+ * and try again later...
+ */
+ if ((bytesin = recv(fd, inbuf, bytesin, 0)) <= 0) {
+ goto CONN_ERROR;
+ }
+
+ if (!(*line = xmalloc(bytesin + 1))) {
+ return -1;
+ }
+
+ memcpy(*line, inbuf, bytesin);
+ (*line)[bytesin] = '\0';
+
+ push_buffer(buffer, *line, strlen(*line));
+
+ return 0;
+
+ CONN_ERROR:
+ if (bytesin == 0 || (errno != EAGAIN && errno != EINTR)) {
+ return -1;
+ }
+ return 0;
+}
diff --git a/src/sock.h b/src/sock.h
new file mode 100644
index 0000000..952a673
--- /dev/null
+++ b/src/sock.h
@@ -0,0 +1,40 @@
+/* $Id: sock.h,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * See 'sock.c' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _SOCK_H_
+#define _SOCK_H_ 1
+
+#include "buffer.h"
+
+#define PEER_IP_LENGTH 16
+#define PEER_STRING_LENGTH 256
+
+extern int setup_fd;
+
+extern int opensock(char *ip_addr, int port);
+extern int opensock_inaddr(struct in_addr *inaddr, int port);
+extern int init_listen_sock(int port);
+extern int listen_sock(void);
+extern void de_init_listen_sock(void);
+extern int setsocketopt(int *sock_fd, int options, int flip);
+
+extern char *getpeer_ip(int fd, char *ipaddr);
+extern char *getpeer_string(int fd, char *string);
+extern int readline(int fd, struct buffer_s *buffer, char **line);
+
+#endif
diff --git a/src/stamp-h.in b/src/stamp-h.in
new file mode 100644
index 0000000..9788f70
--- /dev/null
+++ b/src/stamp-h.in
@@ -0,0 +1 @@
+timestamp
diff --git a/src/tinyproxy.c b/src/tinyproxy.c
new file mode 100644
index 0000000..1425492
--- /dev/null
+++ b/src/tinyproxy.c
@@ -0,0 +1,478 @@
+/* $Id: tinyproxy.c,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * The initialize routine. Basically sets up all the initial stuff (logfile,
+ * listening socket, config options, etc.) and then sits there and loops
+ * over the new connections until the daemon is closed. Also has additional
+ * functions to handle the "user friendly" aspects of a program (usage,
+ * stats, etc.) Like any good program, most of the work is actually done
+ * elsewhere.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ * Copyright (C) 2000 Chris Lightfoot (chris@ex-parrot.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <pwd.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+/* chris - need this for asynchronous DNS resolution */
+#include <adns.h>
+
+adns_state adns;
+
+#include "config.h"
+#include "tinyproxy.h"
+#include "utils.h"
+#include "log.h"
+#include "sock.h"
+#include "conns.h"
+#include "reqs.h"
+#include "buffer.h"
+#include "filter.h"
+
+void takesig(int sig);
+
+/*
+ * Global Structures
+ */
+struct config_s config = {
+ NULL, /* Log file handle */
+ DEFAULT_LOG, /* Logfile name */
+ FALSE, /* Use syslog instead? */
+ DEFAULT_CUTOFFLOAD, /* Cut off load */
+ DEFAULT_PORT, /* Listen on this port */
+ DEFAULT_STATHOST, /* URL of stats host */
+ FALSE, /* Quit? */
+ DEFAULT_USER, /* Name of user to change to */
+ FALSE, /* Run anonymous by default? */
+ NULL, /* String containing the subnet allowed */
+ NULL, /* IP address to listen on */
+#ifdef FILTER_ENABLE
+ NULL, /* Location of filter file */
+#endif /* FILTER_ENABLE */
+ FALSE, /* Restrict the log to only errors */
+#ifdef XTINYPROXY
+ NULL /* The name of this domain */
+#endif
+};
+
+struct stat_s stats;
+float load = 0.00;
+struct allowedhdr_s *allowedhdrs = NULL;
+
+/*
+ * Dump info to the logfile
+ */
+static void dumpdebug(void)
+{
+ struct conn_s *connptr = connections;
+ long clients = 0, waiting = 0, relaying = 0, closing = 0, finished = 0;
+
+ log("SIGUSR1 received, debug dump follows.");
+
+ while (connptr) {
+ switch (connptr->type) {
+ case NEWCONN:
+ clients++;
+ break;
+ case WAITCONN:
+ waiting++;
+ break;
+ case RELAYCONN:
+ relaying++;
+ break;
+ case CLOSINGCONN:
+ closing++;
+ break;
+ case FINISHCONN:
+ finished++;
+ break;
+ default:
+ break;
+ }
+ connptr = connptr->next;
+ }
+ log("clients: %d, waiting: %d, relaying: %d," \
+ "closing: %d, finished: %d",
+ clients, waiting, relaying, closing, finished);
+ log("total requests handled: %lu", stats.num_reqs);
+ log("total connections handled: %lu", stats.num_cons);
+ log("total sockets listened: %lu", stats.num_listens);
+ log("total sockets opened: %lu", stats.num_opens);
+ log("total bad opens: %lu", stats.num_badcons);
+ log("total bytes tx: %lu", stats.num_tx);
+ log("total bytes rx: %lu", stats.num_rx);
+ log("connections refused due to load: %lu", stats.num_refused);
+ log("garbage collections: %lu", stats.num_garbage);
+ log("idle connections killed: %lu", stats.num_idles);
+ log("end debug dump.");
+}
+
+/*
+ * Handle a signal
+ */
+void takesig(int sig)
+{
+ switch (sig) {
+ case SIGUSR1:
+ dumpdebug();
+ break;
+ case SIGHUP:
+ if (config.logf)
+ ftruncate(fileno(config.logf), 0);
+
+ log("SIGHUP received, cleaning up...");
+ conncoll();
+ garbcoll();
+
+#ifdef FILTER_ENABLE
+ if (config.filter) {
+ filter_destroy();
+ filter_init();
+ }
+ log("Re-reading filter file.");
+#endif /* FILTER_ENABLE */
+ log("Finished cleaning memory/connections.");
+ break;
+ case SIGTERM:
+#ifdef FILTER_ENABLE
+ if (config.filter)
+ filter_destroy();
+#endif /* FILTER_ENABLE */
+ config.quit = TRUE;
+ break;
+ case SIGALRM:
+ calcload();
+ alarm(LOAD_RECALCTIMER);
+ break;
+ }
+ if (sig != SIGTERM)
+ signal(sig, takesig);
+ signal(SIGPIPE, SIG_IGN);
+}
+
+/*
+ * Display usage to the user on stderr.
+ */
+static void usagedisp(void)
+{
+ printf("tinyproxy version " VERSION "\n");
+ printf("Copyright 1998 Steven Young (sdyoung@well.com)\n");
+ printf
+ ("Copyright 1998-1999 Robert James Kaes (rjkaes@flarenet.com)\n\n");
+ printf("Copyright 2000 Chris Lightfoot (chris@ex-parrot.com)\n");
+
+ printf
+ ("This software is licensed under the GNU General Public License (GPL).\n");
+ printf("See the file 'COPYING' included with tinyproxy source.\n\n");
+
+ printf("Compiled with Ian Jackson's adns:\n");
+ printf(" http://www.chiark.greenend.org.uk/~ian/adns/\n\n");
+
+ printf("Usage: tinyproxy [args]\n");
+ printf("Options:\n");
+ printf("\t-v\t\tdisplay version number\n");
+ printf("\t-h\t\tdisplay usage\n");
+ printf("\t-d\t\tdo not daemonize\n");
+ printf
+ ("\t-n ip_address\tallow access from only this subnet. (i.e. 192.168.0.)\n");
+ printf("\t-i ip_address\tonly listen on this address\n");
+ printf("\t-p port\t\tlisten on 'port'\n");
+ printf("\t-l logfile\tlog to 'logfile'\n");
+#ifdef HAVE_SYSLOG_H
+ printf("\t-S\t\tlog using the syslog instead\n");
+#endif
+ printf("\t-r\t\trestrict the log to only errors\n");
+ printf
+ ("\t-w load\t\tstop accepting new connections at 'load'. 0 disables\n");
+ printf("\t-s stathost\tset stathost to 'stathost'\n");
+ printf("\t-u user\t\tchange to user after startup. \"\" disables\n");
+ printf("\t-a header\tallow 'header' through the anon block\n");
+#ifdef FILTER_ENABLE
+ printf("\t-f filterfile\tblock sites specified in filterfile\n");
+#endif /* FILTER_ENABLE */
+#ifdef XTINYPROXY
+ printf
+ ("\t-x domain\tAdd a XTinyproxy header with the peer's IP address\n");
+#endif
+
+ /* Display the modes compiled into tinyproxy */
+ printf("\nFeatures Compiled In:\n");
+#ifdef XTINYPROXY
+ printf(" XTinyproxy Header\n");
+#endif /* XTINYPROXY */
+#ifdef FILTER_ENABLE
+ printf(" Filtering\n");
+ printf(" * with Regular Expression support\n");
+#endif /* FILTER_ENABLE */
+#ifndef NDEBUG
+ printf(" Debuggin code\n");
+#endif /* NDEBUG */
+}
+
+int main(int argc, char **argv)
+{
+ int optch;
+ flag usage = FALSE, godaemon = TRUE, changeid = FALSE;
+ struct passwd *thisuser = NULL;
+
+ struct allowedhdr_s **rpallowedptr = &allowedhdrs;
+ struct allowedhdr_s *allowedptr = allowedhdrs, *newallowed;
+
+ while ((optch = getopt(argc, argv, "vh?dp:l:Sa:w:s:u:n:i:rx:f:")) !=
+ EOF) {
+ switch (optch) {
+ case 'v':
+ fprintf(stderr, "tinyproxy version " VERSION "\n");
+ exit(EX_OK);
+ break;
+ case 'p':
+ if (!(config.port = atoi(optarg))) {
+ log
+ ("bad port on commandline, defaulting to %d",
+ DEFAULT_PORT);
+ config.port = DEFAULT_PORT;
+ }
+ break;
+ case 'l':
+ if (!(config.logf_name = xstrdup(optarg))) {
+ log("bad log file, defaulting to %s",
+ DEFAULT_LOG);
+ config.logf_name = DEFAULT_LOG;
+ }
+ break;
+#ifdef HAVE_SYSLOG_H
+ case 'S': /* Use the syslog function to handle logging */
+ config.syslog = TRUE;
+ break;
+#endif
+ case 'd':
+ godaemon = FALSE;
+ break;
+ case 'w':
+ sscanf(optarg, "%f", &config.cutoffload);
+ break;
+ case 's':
+ if (!(config.stathost = xstrdup(optarg))) {
+ log("bad stathost, defaulting to %s",
+ DEFAULT_STATHOST);
+ config.stathost = DEFAULT_STATHOST;
+ }
+ break;
+ case 'u':
+ if (!(config.changeuser = xstrdup(optarg))) {
+ log("bad user name, defaulting to %s",
+ DEFAULT_USER);
+ config.changeuser = DEFAULT_USER;
+ }
+ break;
+ case 'a':
+ config.anonymous = TRUE;
+
+ while (allowedptr) {
+ rpallowedptr = &allowedptr->next;
+ allowedptr = allowedptr->next;
+ }
+
+ if (!
+ (newallowed =
+ xmalloc(sizeof(struct allowedhdr_s)))) {
+ log("tinyproxy: cannot allocate headers");
+ exit(EX_SOFTWARE);
+ }
+
+ if (!(newallowed->hdrname = xstrdup(optarg))) {
+ log("tinyproxy: cannot duplicate string");
+ exit(EX_SOFTWARE);
+ }
+
+ *rpallowedptr = newallowed;
+ newallowed->next = allowedptr;
+
+ break;
+ case 'n':
+ if (!(config.subnet = xstrdup(optarg))) {
+ log("tinyproxy: could not allocate memory");
+ exit(EX_SOFTWARE);
+ }
+ break;
+ case 'i':
+ if (!(config.ipAddr = xstrdup(optarg))) {
+ log("tinyproxy: could not allocate memory");
+ exit(EX_SOFTWARE);
+ }
+ break;
+#ifdef FILTER_ENABLE
+ case 'f':
+ if (!(config.filter = xstrdup(optarg))) {
+ log("tinyproxy: could not allocate memory");
+ }
+ break;
+#endif /* FILTER_ENABLE */
+ case 'r':
+ config.restricted = TRUE;
+ break;
+#ifdef XTINYPROXY
+ case 'x':
+ if (!(config.my_domain = xstrdup(optarg))) {
+ log("tinyproxy: could not allocate memory");
+ exit(EX_SOFTWARE);
+ }
+ break;
+#endif
+ case '?':
+ case 'h':
+ default:
+ usage = TRUE;
+ break;
+ }
+ }
+
+ if (usage == TRUE) {
+ usagedisp();
+ exit(EX_OK);
+ }
+
+ /* chris - Initialise asynchronous DNS */
+ if (adns_init(&adns, 0, 0)) {
+ log("tinyproxy: could not initialise ADNS");
+ exit(EX_SOFTWARE);
+ }
+
+ /* Open the log file if not using syslog */
+ if (config.syslog == FALSE) {
+ if (!(config.logf = fopen(config.logf_name, "a"))) {
+ fprintf(stderr,
+ "Unable to open logfile %s for appending!\n",
+ config.logf_name);
+ exit(EX_CANTCREAT);
+ }
+ } else {
+ if (godaemon == TRUE)
+ openlog("tinyproxy", LOG_PID, LOG_DAEMON);
+ else
+ openlog("tinyproxy", LOG_PID, LOG_USER);
+ }
+
+ log(PACKAGE " " VERSION " starting...");
+
+ if (strlen(config.changeuser)) {
+ if ((getuid() != 0) && (geteuid() != 0)) {
+ log
+ ("not running as root, therefore not changing uid/gid.");
+ } else {
+ changeid = TRUE;
+ if (!(thisuser = getpwnam(config.changeuser))) {
+ log("unable to find user \"%s\"!",
+ config.changeuser);
+ exit(EX_NOUSER);
+ }
+ log("changing to user \"%s\" (%d/%d).",
+ config.changeuser, thisuser->pw_uid,
+ thisuser->pw_gid);
+ }
+ }
+#ifdef NDEBUG
+ if (godaemon == TRUE)
+ makedaemon();
+#else
+ printf("Debugging is enabled, so you can not go daemon.\n");
+#endif
+
+ if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
+ fprintf(stderr, "Could not set SIGPIPE\n");
+ exit(EX_OSERR);
+ }
+ if (signal(SIGUSR1, takesig) == SIG_ERR) {
+ fprintf(stderr, "Could not set SIGUSR1\n");
+ exit(EX_OSERR);
+ }
+ if (signal(SIGTERM, takesig) == SIG_ERR) {
+ fprintf(stderr, "Could not set SIGTERM\n");
+ exit(EX_OSERR);
+ }
+ if (signal(SIGHUP, takesig) == SIG_ERR) {
+ fprintf(stderr, "Could not set SIGHUP\n");
+ exit(EX_OSERR);
+ }
+ if (signal(SIGALRM, takesig) == SIG_ERR) {
+ fprintf(stderr, "Could not set SIGALRM\n");
+ exit(EX_OSERR);
+ }
+ alarm(LOAD_RECALCTIMER);
+ calcload();
+
+ if (init_listen_sock(config.port) < 0) {
+ log("unable to bind port %d!", config.port);
+ exit(EX_UNAVAILABLE);
+ }
+ if (changeid == TRUE) {
+ setuid(thisuser->pw_uid);
+ setgid(thisuser->pw_gid);
+ }
+ log("now accepting connections.");
+
+#ifdef FILTER_ENABLE
+ if (config.filter)
+ filter_init();
+#endif /* FILTER_ENABLE */
+
+ while (config.quit == FALSE) {
+ if (getreqs() < 0)
+ break;
+ }
+
+#ifdef FILTER_ENABLE
+ if (config.filter)
+ filter_destroy();
+#endif /* FILTER_ENABLE */
+
+ log("shutting down.");
+ de_init_listen_sock();
+
+ if (config.syslog == FALSE)
+ fclose(config.logf);
+ else
+ closelog();
+
+ allowedptr = allowedhdrs;
+ while (allowedptr) {
+ struct allowedhdr_s *delptr = NULL;
+ delptr = allowedptr;
+ safefree(delptr->hdrname);
+ allowedptr = delptr->next;
+ safefree(delptr);
+ }
+
+ /* finsih up ADNS */
+ adns_finish(adns);
+
+ exit(EX_OK);
+}
diff --git a/src/tinyproxy.h b/src/tinyproxy.h
new file mode 100644
index 0000000..218d729
--- /dev/null
+++ b/src/tinyproxy.h
@@ -0,0 +1,87 @@
+/* $Id: tinyproxy.h,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * See 'tinyproxy.c' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _TINYPROXY_H_
+#define _TINYPROXY_H_ 1
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <time.h>
+
+#include "config.h"
+
+/* Global variables for the main controls of the program */
+#define BUFFER (1024 * 2) /* Size of buffer for reading */
+#define MAXLISTEN 128 /* Max number of connections to listen for */
+
+/* Make a new type: flag */
+typedef char flag;
+
+/* Other stuff */
+#define FALSE (0)
+#define TRUE (!FALSE)
+
+struct config_s {
+ FILE *logf;
+ char *logf_name;
+ flag syslog;
+ float cutoffload;
+ int port;
+ char *stathost;
+ flag quit;
+ char *changeuser;
+ flag anonymous;
+ char *subnet;
+ char *ipAddr;
+#ifdef FILTER_ENABLE
+ char *filter;
+#endif /* FILTER_ENABLE */
+ flag restricted;
+#ifdef XTINYPROXY
+ char *my_domain;
+#endif
+};
+
+struct stat_s {
+ unsigned long int num_reqs;
+ unsigned long int num_cons;
+ unsigned long int num_badcons;
+ unsigned long int num_opens;
+ unsigned long int num_listens;
+ unsigned long int num_tx;
+ unsigned long int num_rx;
+ unsigned long int num_garbage;
+ unsigned long int num_idles;
+ unsigned long int num_refused;
+};
+
+struct allowedhdr_s {
+ char *hdrname;
+ struct allowedhdr_s *next;
+};
+
+/* Global Structures used in the program */
+extern struct config_s config;
+extern struct stat_s stats;
+extern struct allowedhdr_s *allowedhdrs;
+extern float load;
+
+#endif
diff --git a/src/uri.c b/src/uri.c
new file mode 100644
index 0000000..8a2b744
--- /dev/null
+++ b/src/uri.c
@@ -0,0 +1,124 @@
+/* $Id: uri.c,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
+ *
+ * This borrows the REGEX from RFC2396 to split a URI string into the five
+ * primary components. The components are:
+ * scheme the uri method (like "http", "ftp", "gopher")
+ * authority the domain and optional ":" port
+ * path path to the document/resource
+ * query an optional query (separated with a "?")
+ * fragment an optional fragement (separated with a "#")
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "uri.h"
+#include "utils.h"
+#include "log.h"
+#include "regexp.h"
+
+#define NMATCH 10
+
+#define URIPATTERN "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
+
+#define SCHEME 2
+#define AUTHORITY 4
+#define PATH 5
+#define QUERY_MARK 6
+#define QUERY 7
+#define FRAGMENT_MARK 8
+#define FRAGMENT 9
+
+static int extract_uri(regmatch_t pmatch[], const char *buffer, char **section,
+ int substring)
+{
+ int len = pmatch[substring].rm_eo - pmatch[substring].rm_so;
+ if ((*section = xmalloc(len + 1)) == NULL)
+ return -1;
+
+ memset(*section, '\0', len + 1);
+ memcpy(*section, buffer + pmatch[substring].rm_so, len);
+
+ return 0;
+}
+
+void free_uri(URI * uri)
+{
+ safefree(uri->scheme);
+ safefree(uri->authority);
+ safefree(uri->path);
+ safefree(uri->query);
+ safefree(uri->fragment);
+ safefree(uri);
+}
+
+URI *explode_uri(const char *string)
+{
+ URI *uri;
+ regmatch_t pmatch[NMATCH];
+ regex_t preg;
+
+ if (!(uri = xmalloc(sizeof(URI))))
+ return NULL;
+ memset(uri, 0, sizeof(URI));
+
+ if (regcomp(&preg, URIPATTERN, REG_EXTENDED) != 0) {
+ log("ERROR explode_uri: regcomp");
+ goto ERROR_EXIT;
+ }
+
+ if (regexec(&preg, string, NMATCH, pmatch, 0) != 0) {
+ log("ERROR explode_uri: regexec");
+ goto ERROR_EXIT;
+ }
+
+ regfree(&preg);
+
+ if (pmatch[SCHEME].rm_so != -1) {
+ if (extract_uri(pmatch, string, &uri->scheme, SCHEME) < 0)
+ goto ERROR_EXIT;
+ }
+
+ if (pmatch[AUTHORITY].rm_so != -1) {
+ if (extract_uri(pmatch, string, &uri->authority, AUTHORITY) <
+ 0) goto ERROR_EXIT;
+ }
+
+ if (pmatch[PATH].rm_so != -1) {
+ if (extract_uri(pmatch, string, &uri->path, PATH) < 0)
+ goto ERROR_EXIT;
+ }
+
+ if (pmatch[QUERY_MARK].rm_so != -1) {
+ if (extract_uri(pmatch, string, &uri->query, QUERY) < 0)
+ goto ERROR_EXIT;
+ }
+
+ if (pmatch[FRAGMENT_MARK].rm_so != -1) {
+ if (extract_uri(pmatch, string, &uri->fragment, FRAGMENT) < 0)
+ goto ERROR_EXIT;
+ }
+
+ return uri;
+
+ ERROR_EXIT:
+ free_uri(uri);
+ return NULL;
+}
diff --git a/src/uri.h b/src/uri.h
new file mode 100644
index 0000000..bb18d15
--- /dev/null
+++ b/src/uri.h
@@ -0,0 +1,32 @@
+/* $Id: uri.h,v 1.1.1.1 2000-02-16 17:32:24 sdyoung Exp $
+ *
+ * See 'uri.c' for a detailed description.
+ *
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __URI_H_
+#define __URI_H_
+
+typedef struct {
+ char *scheme;
+ char *authority;
+ char *path;
+ char *query;
+ char *fragment;
+} URI;
+
+extern URI *explode_uri(const char *string);
+extern void free_uri(URI * uri);
+
+#endif
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..1a91d7a
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,264 @@
+/* $Id: utils.c,v 1.1.1.1 2000-02-16 17:32:24 sdyoung Exp $
+ *
+ * Misc. routines which are used by the various functions to handle strings
+ * and memory allocation and pretty much anything else we can think of. Also,
+ * the load cutoff routine is in here, along with the HTML show stats
+ * function. Could not think of a better place for it, so it's in here.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <defines.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <ctype.h>
+#include <sysexits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <assert.h>
+
+#include "config.h"
+#include "tinyproxy.h"
+#include "utils.h"
+#include "log.h"
+#include "conns.h"
+#include "buffer.h"
+
+char *xstrdup(char *st)
+{
+ char *p;
+
+ assert(st);
+
+ if (!(p = strdup(st))) {
+ log("ERROR xstrdup: out of memory (%s)", strerror(errno));
+ return NULL;
+ } else {
+ return p;
+ }
+}
+
+/*
+ * Find the start of the needle in the haystack. Limits the search to less
+ * than "length" characters. Returns NULL if the needle is not found.
+ */
+char *xstrstr(char *haystack, char *needle, unsigned int length,
+ int case_sensitive)
+{
+ unsigned int i;
+ /* Used to specify which function to use... need the decl. */
+ int (*fn) (const char *s1, const char *s2, unsigned int n);
+
+ assert(haystack);
+ assert(needle);
+ assert(length > 0);
+ assert(case_sensitive == FALSE || case_sensitive == TRUE);
+
+ if (case_sensitive)
+ fn = strncmp;
+ else
+ fn = strncasecmp;
+
+ if (strlen(needle) > length)
+ return NULL;
+
+ for (i = 0; i <= length - strlen(needle); i++) {
+ if ((*fn) (haystack + i, needle, strlen(needle)) == 0)
+ return haystack + i;
+
+ }
+
+ return NULL;
+}
+
+/*
+ * for-sure malloc
+ */
+void *xmalloc(unsigned long int sz)
+{
+ void *p;
+
+ assert(sz > 0);
+
+ if (!(p = malloc((size_t) sz))) {
+ log("ERROR xmalloc: out of memory (%s)", strerror(errno));
+ return NULL;
+ }
+ return p;
+}
+
+#ifdef USE_PROC
+int calcload(void)
+{
+ char buf[BUFFER], *p;
+ FILE *f;
+
+ if (!config.cutoffload) {
+ return -1;
+ }
+
+ if (!(f = fopen("/proc/loadavg", "rt"))) {
+ log("unable to read /proc/loadavg");
+ config.cutoffload = 0.0;
+ return -1;
+ }
+ fgets(buf, BUFFER, f);
+ p = strchr(buf, ' ');
+ *p = '\0';
+ load = atof(buf);
+ fclose(f);
+ return 0;
+}
+
+#else
+int calcload(void)
+{
+ FILE *f;
+ char buf[BUFFER];
+ char *p, *y;
+
+ if (!config.cutoffload) {
+ return -1;
+ }
+
+ if (!(f = popen(UPTIME_PATH, "r"))) {
+ log("calcload: unable to exec uptime");
+ config.cutoffload = 0.0;
+ return -1;
+ }
+ fgets(buf, BUFFER, f);
+ p = strrchr(buf, ':');
+ p += 2;
+ y = strchr(p, ',');
+ *y = '\0';
+ load = atof(p);
+ pclose(f);
+ return 0;
+}
+
+#endif
+
+/*
+ * Delete the server's buffer and replace it with a premade message which will
+ * be sent to the client.
+ */
+static void update_output_buffer(struct conn_s *connptr, char *outbuf)
+{
+ assert(connptr);
+ assert(outbuf);
+
+ delete_buffer(connptr->sbuffer);
+ connptr->sbuffer = new_buffer();
+
+ push_buffer(connptr->sbuffer, outbuf, strlen(outbuf));
+ shutdown(connptr->server_fd, 2);
+ connptr->type = CLOSINGCONN;
+}
+
+/*
+ * Display the statics of the tinyproxy server.
+ */
+int showstats(struct conn_s *connptr)
+{
+ char *outbuf;
+ static char *msg = "HTTP/1.0 200 OK\r\n" \
+ "Content-type: text/html\r\n\r\n" \
+ "<html><head><title>%s stats</title></head>\r\n" \
+ "<body>\r\n" \
+ "<center><h2>%s run-time statistics</h2></center><hr>\r\n" \
+ "<blockquote>\r\n" \
+ "Number of requests: %lu<br>\r\n" \
+ "Number of connections: %lu<br>\r\n" \
+ "Number of bad connections: %lu<br>\r\n" \
+ "Number of opens: %lu<br>\r\n" \
+ "Number of listens: %lu<br>\r\n" \
+ "Number of bytes (tx): %lu<br>\r\n" \
+ "Number of bytes (rx): %lu<br>\r\n" \
+ "Number of garbage collects:%lu<br>\r\n" \
+ "Number of idle connection kills:%lu<br>\r\n" \
+ "Number of refused connections due to high load:%lu<br>\r\n" \
+ "Current system load average:%.2f" \
+ "(recalculated every % lu seconds)<br>\r\n" \
+ "</blockquote>\r\n</body></html>\r\n";
+
+ assert(connptr);
+
+ outbuf = xmalloc(BUFFER);
+
+ sprintf(outbuf, msg, VERSION, VERSION, stats.num_reqs,
+ stats.num_cons, stats.num_badcons, stats.num_opens,
+ stats.num_listens, stats.num_tx, stats.num_rx,
+ stats.num_garbage, stats.num_idles, stats.num_refused, load,
+ LOAD_RECALCTIMER);
+
+ update_output_buffer(connptr, outbuf);
+
+ return 0;
+}
+
+/*
+ * Display an error to the client.
+ */
+int httperr(struct conn_s *connptr, int err, char *msg)
+{
+ char *outbuf;
+ static char *premsg = "HTTP/1.0 %d %s\r\n" \
+ "Content-type: text/html\r\n\r\n" \
+ "<html><head><title>%s</title></head>\r\n" \
+ "<body>\r\n" \
+ "<font size=\"+2\">Cache Error!</font><br>\r\n" \
+ "An error of type %d occurred: %s\r\n" \
+ "<hr>\r\n" \
+ "<font size=\"-1\"><em>Generated by %s</em></font>\r\n" \
+ "</body></html>\r\n";
+
+ assert(connptr);
+ assert(err > 0);
+ assert(msg);
+
+ outbuf = xmalloc(BUFFER);
+ sprintf(outbuf, premsg, err, msg, msg, err, msg, VERSION);
+
+ update_output_buffer(connptr, outbuf);
+
+ return 0;
+}
+
+void makedaemon(void)
+{
+ if (fork() != 0)
+ exit(0);
+
+ setsid();
+ signal(SIGHUP, SIG_IGN);
+
+ if (fork() != 0)
+ exit(0);
+
+ chdir("/");
+ umask(0);
+
+ close(0);
+ close(1);
+ close(2);
+}
diff --git a/src/utils.h b/src/utils.h
new file mode 100644
index 0000000..76a3565
--- /dev/null
+++ b/src/utils.h
@@ -0,0 +1,38 @@
+/* $Id: utils.h,v 1.1.1.1 2000-02-16 17:32:24 sdyoung Exp $
+ *
+ * See 'utils.h' for a detailed description.
+ *
+ * Copyright (C) 1998 Steven Young
+ * Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __UTILS_H_
+#define __UTILS_H_ 1
+
+#include "conns.h"
+
+#define safefree(x) free(x); x = NULL
+
+extern char *xstrdup(char *st);
+extern void *xmalloc(unsigned long int sz);
+extern char *xstrstr(char *haystack, char *needle, unsigned int length,
+ int case_sensitive);
+
+extern int showstats(struct conn_s *connptr);
+extern int httperr(struct conn_s *connptr, int err, char *msg);
+
+extern int calcload(void);
+
+extern void makedaemon(void);
+
+#endif